-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathstrsim.go
97 lines (81 loc) · 2.05 KB
/
strsim.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package strsim
import (
"fmt"
"sort"
"strings"
"unicode"
)
// Compare returns a fraction between 0 and 1, which indicates the degree of
// similarity between the two strings. 0 indicates completely different strings,
// 1 indicates identical strings. The comparison is case-sensitive.
func Compare(a, b string) float64 {
a = stripSpaces(a)
b = stripSpaces(b)
la, lb := len(a), len(b)
if a == b {
return 1.0
}
if la < 2 || lb < 2 {
return 0.0
}
firstBigrams := map[string]int{}
for i := range a[:la-1] {
bigram := a[i : i+2]
firstBigrams[bigram]++
}
var intersectionSize float64
for i := range b[:lb-1] {
bigram := b[i : i+2]
count := firstBigrams[bigram]
if count > 0 {
firstBigrams[bigram] = count - 1
intersectionSize++
}
}
return 2.0 * intersectionSize / float64(la+lb-2)
}
// Match contains target string with score.
type Match struct {
Target string
Score float64
}
// MatchResult contains all matches, a best match with index.
type MatchResult struct {
Matches []*Match
BestMatch *Match
BestMatchIndex int
}
// SortedByScore sorts matches ascending by their scores.
func (mr *MatchResult) SortedByScore() {
sort.Slice(mr.Matches, func(a, b int) bool {
return mr.Matches[a].Score > mr.Matches[b].Score
})
mr.BestMatchIndex = 0
}
// FindBestMatch compares s against each string in targets.
func FindBestMatch(s string, targets []string) (*MatchResult, error) {
if len(targets) < 1 {
return nil, fmt.Errorf("targets parameter need at least one element")
}
matches := make([]*Match, 0, len(targets))
bestMatchIndex := 0
for i, t := range targets {
score := Compare(s, t)
matches = append(matches, &Match{t, score})
if score > matches[bestMatchIndex].Score {
bestMatchIndex = i
}
}
bestMatch := matches[bestMatchIndex]
return &MatchResult{matches, bestMatch, bestMatchIndex}, nil
}
func stripSpaces(str string) string {
return strings.Map(func(r rune) rune {
if unicode.IsSpace(r) {
// if the character is a space, drop it
return -1
}
// else keep it in the string
return r
}, str)
}