Skip to content

Commit 01d1785

Browse files
authored
Merge pull request #258 from ikawaha/develop
Develop
2 parents 72c7bf2 + 7908e86 commit 01d1785

File tree

10 files changed

+237
-52
lines changed

10 files changed

+237
-52
lines changed

.github/workflows/go.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ jobs:
1414
fail-fast: false
1515
matrix:
1616
os: [ubuntu-latest, macos-latest, windows-latest]
17-
go: ['1.16.x', '1.17.x']
17+
go: ['1.17.x', '1.18.x']
1818
runs-on: ${{ matrix.os }}
1919

2020
steps:

.github/workflows/release.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ jobs:
1818
- name: Set up Go
1919
uses: actions/setup-go@v3
2020
with:
21-
go-version: '1.17.x'
21+
go-version: '1.18.x'
2222
- name: Run GoReleaser
2323
uses: goreleaser/goreleaser-action@v3
2424
with:

app.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"name": "kagome",
33
"description": "Kagome Japanese Morphological Analyzer",
44
"env": {
5-
"GOVERSION": "go1.17",
5+
"GOVERSION": "go1.18",
66
"GO_INSTALL_PACKAGE_SPEC": "."
77
},
88
"buildpacks": [

go.mod

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
module github.com/ikawaha/kagome/v2
22

3-
// +heroku goVersion go1.17
4-
go 1.17
3+
// +heroku goVersion go1.18
4+
go 1.18
55

66
require (
77
github.com/ikawaha/kagome-dict v1.0.4

tokenizer/lattice/lattice.go

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,8 @@ var latticePool = sync.Pool{
4141
// Lattice represents a grid of morph nodes.
4242
type Lattice struct {
4343
Input string
44-
Output []*node
45-
list [][]*node
44+
Output []*Node
45+
list [][]*Node
4646
dic *dict.Dict
4747
udic *dict.UserDict
4848
}
@@ -86,15 +86,15 @@ func (la *Lattice) addNode(pos, id, position, start int, class NodeClass, surfac
8686
case USER:
8787
// use default cost
8888
}
89-
n := newNode()
89+
n := nodePool.Get().(*Node)
9090
n.ID = id
9191
n.Position = position
9292
n.Start = start
9393
n.Class = class
9494
n.Cost = 0
9595
n.Left, n.Right, n.Weight = int32(m.LeftID), int32(m.RightID), int32(m.Weight)
9696
n.Surface = surface
97-
n.Prev = nil
97+
n.prev = nil
9898
p := pos + utf8.RuneCountInString(surface)
9999
la.list[p] = append(la.list[p], n)
100100
}
@@ -105,8 +105,7 @@ func (la *Lattice) Build(inp string) {
105105
rc := utf8.RuneCountInString(inp)
106106
la.Input = inp
107107
if cap(la.list) < rc+2 {
108-
const expandRatio = 2
109-
la.list = make([][]*node, 0, (rc+2)*expandRatio)
108+
la.list = make([][]*Node, 0, rc+2)
110109
}
111110
la.list = la.list[0 : rc+2]
112111

@@ -201,7 +200,7 @@ func kanjiOnly(s string) bool {
201200
return s != ""
202201
}
203202

204-
func additionalCost(n *node) int {
203+
func additionalCost(n *Node) int {
205204
l := utf8.RuneCountInString(n.Surface)
206205
if l > searchModeKanjiLength && kanjiOnly(n.Surface) {
207206
return (l - searchModeKanjiLength) * searchModeKanjiPenalty
@@ -236,7 +235,7 @@ func (la *Lattice) Forward(m TokenizeMode) {
236235
}
237236
if j == 0 || int32(totalCost) < la.list[i][index].Cost {
238237
la.list[i][index].Cost = int32(totalCost)
239-
la.list[i][index].Prev = la.list[target.Start][j]
238+
la.list[i][index].prev = la.list[target.Start][j]
240239
}
241240
}
242241
}
@@ -245,26 +244,20 @@ func (la *Lattice) Forward(m TokenizeMode) {
245244

246245
// Backward runs backward algorithm of the Viterbi.
247246
func (la *Lattice) Backward(m TokenizeMode) {
248-
const bufferExpandRatio = 2
249247
size := len(la.list)
250248
if size == 0 {
251249
return
252250
}
253-
if cap(la.Output) < size {
254-
la.Output = make([]*node, 0, size*bufferExpandRatio)
255-
} else {
256-
la.Output = la.Output[:0]
257-
}
258-
for p := la.list[size-1][0]; p != nil; p = p.Prev {
251+
for p := la.list[size-1][0]; p != nil; p = p.prev {
259252
if m != Extended || p.Class != UNKNOWN {
260253
la.Output = append(la.Output, p)
261254
continue
262255
}
263256
runeLen := utf8.RuneCountInString(p.Surface)
264-
stack := make([]*node, 0, runeLen)
257+
stack := make([]*Node, 0, runeLen)
265258
i := 0
266259
for k, r := range p.Surface {
267-
stack = append(stack, &node{
260+
stack = append(stack, &Node{
268261
ID: p.ID,
269262
Start: p.Start + i,
270263
Class: DUMMY,
@@ -279,7 +272,7 @@ func (la *Lattice) Backward(m TokenizeMode) {
279272
}
280273
}
281274

282-
func posFeature(d *dict.Dict, u *dict.UserDict, t *node) string {
275+
func posFeature(d *dict.Dict, u *dict.UserDict, t *Node) string {
283276
var ret []string
284277
switch t.Class {
285278
case KNOWN:
@@ -318,13 +311,13 @@ func posFeature(d *dict.Dict, u *dict.UserDict, t *node) string {
318311
// Dot outputs a lattice in the graphviz dot format.
319312
//nolint:gocyclo
320313
func (la *Lattice) Dot(w io.Writer) {
321-
bests := make(map[*node]struct{})
314+
bests := make(map[*Node]struct{})
322315
for _, n := range la.Output {
323316
bests[n] = struct{}{}
324317
}
325318
type edge struct {
326-
from *node
327-
to *node
319+
from *Node
320+
to *Node
328321
}
329322
edges := make([]edge, 0, 1024)
330323
for i, size := 1, len(la.list); i < size; i++ {

tokenizer/lattice/lattice_test.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ func Test_LatticeBuildEmptyInput(t *testing.T) {
2121
if la.Input != inp {
2222
t.Errorf("got %v, expected %v", la.Input, inp)
2323
}
24-
boseos := node{ID: -1}
24+
boseos := Node{ID: -1}
2525
if len(la.list) != 2 {
2626
t.Errorf("lattice initialize error: got %v, expected has 2 eos/bos nodes", la.list)
2727
} else if len(la.list[0]) != 1 || *la.list[0][0] != boseos {
@@ -53,8 +53,8 @@ func Test_LatticeBuild(t *testing.T) {
5353
if la.Input != inp {
5454
t.Errorf("got %v, expected %v", la.Input, inp)
5555
}
56-
bos := node{ID: -1}
57-
eos := node{ID: -1, Position: len(inp), Start: 1}
56+
bos := Node{ID: -1}
57+
eos := Node{ID: -1, Position: len(inp), Start: 1}
5858
if len(la.list) != 3 {
5959
t.Errorf("lattice initialize error: got %v, expected has 2 eos/bos nodes", la.list)
6060
} else if len(la.list[0]) != 1 || *la.list[0][0] != bos {
@@ -133,8 +133,8 @@ func Test_LatticeBuildUnknown(t *testing.T) {
133133
if la.Input != inp {
134134
t.Errorf("got %v, expected %v", la.Input, inp)
135135
}
136-
bos := node{ID: -1}
137-
eos := node{ID: -1, Position: len(inp), Start: 3}
136+
bos := Node{ID: -1}
137+
eos := Node{ID: -1, Position: len(inp), Start: 3}
138138
if len(la.list) != 5 {
139139
t.Errorf("lattice initialize error: got %v, expected has 2 eos/bos nodes", la.list)
140140
} else if len(la.list[0]) != 1 || *la.list[0][0] != bos {
@@ -216,8 +216,8 @@ func Test_LatticeBuildInvalidInput(t *testing.T) {
216216
if la.Input != inp {
217217
t.Errorf("got %v, expected %v", la.Input, inp)
218218
}
219-
bos := node{ID: -1}
220-
eos := node{ID: -1, Position: len(inp), Start: 4}
219+
bos := Node{ID: -1}
220+
eos := Node{ID: -1, Position: len(inp), Start: 4}
221221
if len(la.list) != 6 {
222222
t.Errorf("lattice initialize error: got %v, expected has 2 eos/bos nodes", la.list)
223223
} else if len(la.list[0]) != 1 || *la.list[0][0] != bos {

tokenizer/lattice/node.go

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ func (nc NodeClass) String() string {
3131
return "UNDEF"
3232
}
3333

34-
type node struct {
34+
// Node is a lattice node.
35+
type Node struct {
3536
ID int
3637
Position int // byte position
3738
Start int // rune position
@@ -41,15 +42,11 @@ type node struct {
4142
Right int32
4243
Weight int32
4344
Surface string
44-
Prev *node
45+
prev *Node
4546
}
4647

4748
var nodePool = sync.Pool{
4849
New: func() interface{} {
49-
return new(node)
50+
return new(Node)
5051
},
5152
}
52-
53-
func newNode() *node {
54-
return nodePool.Get().(*node)
55-
}

tokenizer/lattice/node_test.go

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,20 +5,19 @@ import (
55
)
66

77
func Test_NodeClassString(t *testing.T) {
8-
pairs := []struct {
9-
in NodeClass
10-
out string
8+
testdata := []struct {
9+
class NodeClass
10+
want string
1111
}{
12-
{DUMMY, "DUMMY"},
13-
{KNOWN, "KNOWN"},
14-
{UNKNOWN, "UNKNOWN"},
15-
{USER, "USER"},
16-
{NodeClass(999), "UNDEF"},
12+
{class: DUMMY, want: "DUMMY"},
13+
{class: KNOWN, want: "KNOWN"},
14+
{class: UNKNOWN, want: "UNKNOWN"},
15+
{class: USER, want: "USER"},
16+
{class: NodeClass(999), want: "UNDEF"},
1717
}
18-
19-
for _, p := range pairs {
20-
if p.in.String() != p.out {
21-
t.Errorf("got %v, expected %v", p.in.String(), p.out)
18+
for _, p := range testdata {
19+
if got := p.class.String(); got != p.want {
20+
t.Errorf("got %v, expected %v", got, p.want)
2221
}
2322
}
2423
}

tokenizer/token.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,29 @@ func (t Token) POS() []string {
165165
return nil
166166
}
167167

168+
// EqualFeatures returns true, if the features of tokens are equal.
169+
func (t Token) EqualFeatures(tt Token) bool {
170+
return EqualFeatures(t.Features(), tt.Features())
171+
}
172+
173+
// EqualPOS returns true, if the POSs of tokens are equal.
174+
func (t Token) EqualPOS(tt Token) bool {
175+
return EqualFeatures(t.POS(), tt.POS())
176+
}
177+
178+
// EqualFeatures returns true, if the features are equal.
179+
func EqualFeatures(lhs, rhs []string) bool {
180+
if len(lhs) != len(rhs) {
181+
return false
182+
}
183+
for i := 0; i < len(lhs); i++ {
184+
if lhs[i] != rhs[i] {
185+
return false
186+
}
187+
}
188+
return true
189+
}
190+
168191
// InflectionalType returns the inflectional type feature if exists.
169192
func (t Token) InflectionalType() (string, bool) {
170193
return t.pickupFromFeatures(dict.InflectionalType)

0 commit comments

Comments
 (0)