-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathunicode.go
62 lines (54 loc) · 1.8 KB
/
unicode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
// Copyright (c) 2013 Couchbase, Inc.
// +build ignore
package collatejson
//import "code.google.com/p/go.text/collate"
//import "code.google.com/p/go.text/collate/colltab"
//import "code.google.com/p/go.text/language"
//import "code.google.com/p/go.text/unicode/norm"
// UnicodeCollationPriority sets collate.Collator properties for unicode
// collation.
func (codec *Codec) UnicodeCollationPriority(
strength colltab.Level, alternate collate.AlternateHandling,
backwards, hiraganaQ, caseLevel, numeric bool) {
codec.strength = strength
codec.alternate = alternate
codec.backwards = backwards
codec.hiraganaQ = hiraganaQ
codec.caseLevel = caseLevel
codec.numeric = numeric
}
// SetLanguage uses language tag while doing unicode collation.
func (codec *Codec) SetLanguage(l language.Tag) {
codec.language = l
}
// EncodeUnicodeString encodes string in utf8 encoding to binary sequence based
// on UTF8, NFKD or go.text/collate algorithms.
func (codec *Codec) EncodeUnicodeString(value string) (code []byte) {
bs := []byte(value)
if codec.utf8 {
code = []byte(bs)
} else if codec.nfkd {
code = norm.NFKD.Bytes([]byte(bs)) // canonical decomposed
} else {
// TODO: Try to understand the performance implication of collate.Buffer
// object
buf := &collate.Buffer{}
c := collate.New(codec.language)
c.Strength = codec.strength
c.Alternate = codec.alternate
c.Backwards = codec.backwards
c.HiraganaQuaternary = codec.hiraganaQ
c.CaseLevel = codec.caseLevel
c.Numeric = codec.numeric
code = c.Key(buf, []byte(bs))
}
return code
}
// SortbyNFKD will enable an alternate collation using NFKD unicode standard.
func (codec *Codec) SortbyNFKD(what bool) {
codec.nfkd = what
}
// SortbyUTF8 will do plain binary comparision for strings.
func (codec *Codec) SortbyUTF8(what bool) {
codec.utf8 = what
}