Skip to content

Commit

Permalink
✂️ domainset: optimize DomainSuffixTrie
Browse files Browse the repository at this point in the history
Reduce indirection and simplify logic.
  • Loading branch information
database64128 committed Aug 22, 2024
1 parent 35a2370 commit e82321e
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 65 deletions.
2 changes: 1 addition & 1 deletion cmd/shadowsocks-go-domain-set-converter/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ func DomainSetBuilderFromDlc(text string) (domainset.Builder, error) {

dsb := domainset.Builder{
domainset.NewDomainMapMatcher(0),
domainset.NewDomainSuffixTrie(0),
domainset.NewDomainSuffixTrieMatcherBuilder(0),
domainset.NewKeywordLinearMatcher(0),
domainset.NewRegexpMatcherBuilder(0),
}
Expand Down
8 changes: 4 additions & 4 deletions domainset/domainset.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ func BuilderFromGob(r io.Reader) (Builder, error) {
}

func BuilderFromText(text string) (Builder, error) {
return BuilderFromTextFunc(text, NewDomainMapMatcher, NewDomainSuffixTrie, NewKeywordLinearMatcher, NewRegexpMatcherBuilder)
return BuilderFromTextFunc(text, NewDomainMapMatcher, NewDomainSuffixTrieMatcherBuilder, NewKeywordLinearMatcher, NewRegexpMatcherBuilder)
}

func BuilderFromTextFast(text string) (Builder, error) {
Expand Down Expand Up @@ -263,13 +263,13 @@ func ParseCapacityHint(line string) ([4]int, bool, error) {
// BuilderGob is the builder's gob serialization structure.
type BuilderGob struct {
Domains DomainMapMatcher
Suffixes *DomainSuffixTrie
Suffixes DomainSuffixTrie
Keywords KeywordLinearMatcher
Regexps RegexpMatcherBuilder
}

func (bg BuilderGob) Builder() Builder {
return Builder{&bg.Domains, bg.Suffixes, &bg.Keywords, &bg.Regexps}
return Builder{&bg.Domains, &bg.Suffixes, &bg.Keywords, &bg.Regexps}
}

func (bg BuilderGob) WriteGob(w io.Writer) error {
Expand All @@ -286,7 +286,7 @@ func BuilderGobFromBuilder(dsb Builder) (bg BuilderGob) {

switch s := dsb.SuffixMatcherBuilder().(type) {
case *DomainSuffixTrie:
bg.Suffixes = s
bg.Suffixes = *s
default:
bg.Suffixes = DomainSuffixTrieFromSeq(s.Rules())
}
Expand Down
153 changes: 93 additions & 60 deletions domainset/matcher_suffix_trie.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,23 @@ package domainset

import (
"iter"
"slices"
)

// DomainSuffixTrie is a trie of domain parts segmented by '.'.
type DomainSuffixTrie struct {
// Children maps the next domain part to its child node.
//
// If Children is nil, the node is a leaf node.
Children map[string]*DomainSuffixTrie
Children map[string]DomainSuffixTrie
}

// Insert inserts a domain suffix to the trie.
// Insertion purges the leaf node's children.
// If say, we insert "www.google.com" and then "google.com",
// The children of node "google" will be purged.
func (dst *DomainSuffixTrie) Insert(domain string) {
//
// Insert implements [MatcherBuilder.Insert].
func (dst DomainSuffixTrie) Insert(domain string) {
cdst := dst

for i := len(domain) - 1; i >= 0; i-- {
Expand All @@ -27,63 +28,52 @@ func (dst *DomainSuffixTrie) Insert(domain string) {

part := domain[i+1:]

if cdst.Children == nil {
var ndst DomainSuffixTrie
cdst.Children = map[string]*DomainSuffixTrie{
part: &ndst,
}
cdst = &ndst
} else {
ndst, ok := cdst.Children[part]
switch {
case !ok:
ndst = &DomainSuffixTrie{}
cdst.Children[part] = ndst
cdst = ndst
case ndst.Children == nil:
// Reached a leaf node halfway through, which means a shorter suffix
// is already present. No need to insert further.
return
default:
cdst = ndst
ndst, ok := cdst.Children[part]
switch {
case !ok:
// Insert the part as a non-leaf child node.
ndst = DomainSuffixTrie{
Children: make(map[string]DomainSuffixTrie, 1),
}
cdst.Children[part] = ndst
case ndst.Children == nil:
// Reached a leaf node halfway through, which means a shorter suffix
// is already present. No need to insert further.
return
}

// Move to the next child node.
cdst = ndst

// Strip the current part from the domain.
domain = domain[:i]
}

// Make the final (from right to left) part a leaf node.
if cdst.Children == nil {
cdst.Children = map[string]*DomainSuffixTrie{
domain: {},
}
} else {
ndst, ok := cdst.Children[domain]
if !ok {
cdst.Children[domain] = &DomainSuffixTrie{}
} else {
ndst.Children = nil
}
}
cdst.Children[domain] = DomainSuffixTrie{}
}

// Match implements the Matcher Match method.
func (dst *DomainSuffixTrie) Match(domain string) bool {
// Match returns true if the domain matches any suffix in the trie.
//
// Match implements [Matcher.Match].
func (dst DomainSuffixTrie) Match(domain string) bool {
cdst := dst

for i := len(domain) - 1; i >= 0; i-- {
if domain[i] != '.' {
continue
}

ndst, ok := cdst.Children[domain[i+1:]]
part := domain[i+1:]

ndst, ok := cdst.Children[part]
if !ok {
return false
}
if ndst.Children == nil {
return true
}

cdst = ndst
domain = domain[:i]
}
Expand All @@ -95,64 +85,107 @@ func (dst *DomainSuffixTrie) Match(domain string) bool {
return ndst.Children == nil
}

// Keys returns the keys of the trie.
func (dst *DomainSuffixTrie) Keys() (keys []string) {
// KeySlice returns the keys in the trie as a slice.
func (dst DomainSuffixTrie) KeySlice() (keys []string) {
for s, c := range dst.Children {
keys = c.keys(s, keys)
keys = c.keySlice(s, keys)
}
return
}

func (dst *DomainSuffixTrie) keys(suffix string, keys []string) []string {
func (dst DomainSuffixTrie) keySlice(suffix string, keys []string) []string {
if dst.Children == nil {
return append(keys, suffix)
}
for s, c := range dst.Children {
keys = c.keys(s+"."+suffix, keys)
keys = c.keySlice(s+"."+suffix, keys)
}
return keys
}

// Rules implements the MatcherBuilder Rules method.
func (dst *DomainSuffixTrie) Rules() (int, iter.Seq[string]) {
// TODO: Implement an iterator for the trie.
keys := dst.Keys()
return len(keys), slices.Values(keys)
// KeyCount returns the number of keys in the trie.
func (dst DomainSuffixTrie) KeyCount() int {
if dst.Children == nil {
return 1
}
var count int
for _, c := range dst.Children {
count += c.KeyCount()
}
return count
}

// Keys returns an iterator over the keys in the trie.
func (dst DomainSuffixTrie) Keys() iter.Seq[string] {
return func(yield func(string) bool) {
for s, c := range dst.Children {
if !c.keys(s, yield) {
return
}
}
}
}

// MatcherCount implements the MatcherBuilder MatcherCount method.
func (dst *DomainSuffixTrie) MatcherCount() int {
func (dst DomainSuffixTrie) keys(suffix string, yield func(string) bool) bool {
if dst.Children == nil {
return yield(suffix)
}
for s, c := range dst.Children {
if !c.keys(s+"."+suffix, yield) {
return false
}
}
return true
}

// Rules implements [MatcherBuilder.Rules].
func (dst DomainSuffixTrie) Rules() (int, iter.Seq[string]) {
return dst.KeyCount(), dst.Keys()
}

// MatcherCount implements [MatcherBuilder.MatcherCount].
func (dst DomainSuffixTrie) MatcherCount() int {
if len(dst.Children) == 0 {
return 0
}
return 1
}

// AppendTo implements the MatcherBuilder AppendTo method.
// AppendTo implements [MatcherBuilder.AppendTo].
func (dst *DomainSuffixTrie) AppendTo(matchers []Matcher) ([]Matcher, error) {
if dst.Children == nil {
if len(dst.Children) == 0 {
return matchers, nil
}
return append(matchers, dst), nil
}

func NewDomainSuffixTrie(capacity int) MatcherBuilder {
return &DomainSuffixTrie{}
// NewDomainSuffixTrie returns a new [DomainSuffixTrie].
func NewDomainSuffixTrie() DomainSuffixTrie {
return DomainSuffixTrie{
Children: make(map[string]DomainSuffixTrie),
}
}

func DomainSuffixTrieFromSlice(suffixes []string) *DomainSuffixTrie {
var dst DomainSuffixTrie
// NewDomainSuffixTrieMatcherBuilder returns a new [*DomainSuffixTrie] as a [MatcherBuilder].
func NewDomainSuffixTrieMatcherBuilder(_ int) MatcherBuilder {
dst := NewDomainSuffixTrie()
return &dst
}

// DomainSuffixTrieFromSlice creates a [DomainSuffixTrie] from a slice of suffix rules.
func DomainSuffixTrieFromSlice(suffixes []string) DomainSuffixTrie {
dst := NewDomainSuffixTrie()
for _, s := range suffixes {
dst.Insert(s)
}
return &dst
return dst
}

// DomainSuffixTrieFromSeq creates a [*DomainSuffixTrie] from a sequence of suffix rules.
func DomainSuffixTrieFromSeq(_ int, suffixSeq iter.Seq[string]) *DomainSuffixTrie {
var dst DomainSuffixTrie
// DomainSuffixTrieFromSeq creates a [DomainSuffixTrie] from a sequence of suffix rules.
func DomainSuffixTrieFromSeq(_ int, suffixSeq iter.Seq[string]) DomainSuffixTrie {
dst := NewDomainSuffixTrie()
for suffix := range suffixSeq {
dst.Insert(suffix)
}
return &dst
return dst
}

0 comments on commit e82321e

Please sign in to comment.