From e82321eed8c81ff84fc405a21d26d2f3b3b91b46 Mon Sep 17 00:00:00 2001 From: database64128 Date: Thu, 22 Aug 2024 13:14:44 +0800 Subject: [PATCH] =?UTF-8?q?=E2=9C=82=EF=B8=8F=20domainset:=20optimize=20Do?= =?UTF-8?q?mainSuffixTrie?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduce indirection and simplify logic. --- .../main.go | 2 +- domainset/domainset.go | 8 +- domainset/matcher_suffix_trie.go | 153 +++++++++++------- 3 files changed, 98 insertions(+), 65 deletions(-) diff --git a/cmd/shadowsocks-go-domain-set-converter/main.go b/cmd/shadowsocks-go-domain-set-converter/main.go index 87112b6..d0e2b8f 100644 --- a/cmd/shadowsocks-go-domain-set-converter/main.go +++ b/cmd/shadowsocks-go-domain-set-converter/main.go @@ -123,7 +123,7 @@ func DomainSetBuilderFromDlc(text string) (domainset.Builder, error) { dsb := domainset.Builder{ domainset.NewDomainMapMatcher(0), - domainset.NewDomainSuffixTrie(0), + domainset.NewDomainSuffixTrieMatcherBuilder(0), domainset.NewKeywordLinearMatcher(0), domainset.NewRegexpMatcherBuilder(0), } diff --git a/domainset/domainset.go b/domainset/domainset.go index e922385..4c525c9 100644 --- a/domainset/domainset.go +++ b/domainset/domainset.go @@ -156,7 +156,7 @@ func BuilderFromGob(r io.Reader) (Builder, error) { } func BuilderFromText(text string) (Builder, error) { - return BuilderFromTextFunc(text, NewDomainMapMatcher, NewDomainSuffixTrie, NewKeywordLinearMatcher, NewRegexpMatcherBuilder) + return BuilderFromTextFunc(text, NewDomainMapMatcher, NewDomainSuffixTrieMatcherBuilder, NewKeywordLinearMatcher, NewRegexpMatcherBuilder) } func BuilderFromTextFast(text string) (Builder, error) { @@ -263,13 +263,13 @@ func ParseCapacityHint(line string) ([4]int, bool, error) { // BuilderGob is the builder's gob serialization structure. type BuilderGob struct { Domains DomainMapMatcher - Suffixes *DomainSuffixTrie + Suffixes DomainSuffixTrie Keywords KeywordLinearMatcher Regexps RegexpMatcherBuilder } func (bg BuilderGob) Builder() Builder { - return Builder{&bg.Domains, bg.Suffixes, &bg.Keywords, &bg.Regexps} + return Builder{&bg.Domains, &bg.Suffixes, &bg.Keywords, &bg.Regexps} } func (bg BuilderGob) WriteGob(w io.Writer) error { @@ -286,7 +286,7 @@ func BuilderGobFromBuilder(dsb Builder) (bg BuilderGob) { switch s := dsb.SuffixMatcherBuilder().(type) { case *DomainSuffixTrie: - bg.Suffixes = s + bg.Suffixes = *s default: bg.Suffixes = DomainSuffixTrieFromSeq(s.Rules()) } diff --git a/domainset/matcher_suffix_trie.go b/domainset/matcher_suffix_trie.go index 2660e5c..25c6c4a 100644 --- a/domainset/matcher_suffix_trie.go +++ b/domainset/matcher_suffix_trie.go @@ -2,7 +2,6 @@ package domainset import ( "iter" - "slices" ) // DomainSuffixTrie is a trie of domain parts segmented by '.'. @@ -10,14 +9,16 @@ type DomainSuffixTrie struct { // Children maps the next domain part to its child node. // // If Children is nil, the node is a leaf node. - Children map[string]*DomainSuffixTrie + Children map[string]DomainSuffixTrie } // Insert inserts a domain suffix to the trie. // Insertion purges the leaf node's children. // If say, we insert "www.google.com" and then "google.com", // The children of node "google" will be purged. -func (dst *DomainSuffixTrie) Insert(domain string) { +// +// Insert implements [MatcherBuilder.Insert]. +func (dst DomainSuffixTrie) Insert(domain string) { cdst := dst for i := len(domain) - 1; i >= 0; i-- { @@ -27,49 +28,35 @@ func (dst *DomainSuffixTrie) Insert(domain string) { part := domain[i+1:] - if cdst.Children == nil { - var ndst DomainSuffixTrie - cdst.Children = map[string]*DomainSuffixTrie{ - part: &ndst, - } - cdst = &ndst - } else { - ndst, ok := cdst.Children[part] - switch { - case !ok: - ndst = &DomainSuffixTrie{} - cdst.Children[part] = ndst - cdst = ndst - case ndst.Children == nil: - // Reached a leaf node halfway through, which means a shorter suffix - // is already present. No need to insert further. - return - default: - cdst = ndst + ndst, ok := cdst.Children[part] + switch { + case !ok: + // Insert the part as a non-leaf child node. + ndst = DomainSuffixTrie{ + Children: make(map[string]DomainSuffixTrie, 1), } + cdst.Children[part] = ndst + case ndst.Children == nil: + // Reached a leaf node halfway through, which means a shorter suffix + // is already present. No need to insert further. + return } + // Move to the next child node. + cdst = ndst + // Strip the current part from the domain. domain = domain[:i] } // Make the final (from right to left) part a leaf node. - if cdst.Children == nil { - cdst.Children = map[string]*DomainSuffixTrie{ - domain: {}, - } - } else { - ndst, ok := cdst.Children[domain] - if !ok { - cdst.Children[domain] = &DomainSuffixTrie{} - } else { - ndst.Children = nil - } - } + cdst.Children[domain] = DomainSuffixTrie{} } -// Match implements the Matcher Match method. -func (dst *DomainSuffixTrie) Match(domain string) bool { +// Match returns true if the domain matches any suffix in the trie. +// +// Match implements [Matcher.Match]. +func (dst DomainSuffixTrie) Match(domain string) bool { cdst := dst for i := len(domain) - 1; i >= 0; i-- { @@ -77,13 +64,16 @@ func (dst *DomainSuffixTrie) Match(domain string) bool { continue } - ndst, ok := cdst.Children[domain[i+1:]] + part := domain[i+1:] + + ndst, ok := cdst.Children[part] if !ok { return false } if ndst.Children == nil { return true } + cdst = ndst domain = domain[:i] } @@ -95,64 +85,107 @@ func (dst *DomainSuffixTrie) Match(domain string) bool { return ndst.Children == nil } -// Keys returns the keys of the trie. -func (dst *DomainSuffixTrie) Keys() (keys []string) { +// KeySlice returns the keys in the trie as a slice. +func (dst DomainSuffixTrie) KeySlice() (keys []string) { for s, c := range dst.Children { - keys = c.keys(s, keys) + keys = c.keySlice(s, keys) } return } -func (dst *DomainSuffixTrie) keys(suffix string, keys []string) []string { +func (dst DomainSuffixTrie) keySlice(suffix string, keys []string) []string { if dst.Children == nil { return append(keys, suffix) } for s, c := range dst.Children { - keys = c.keys(s+"."+suffix, keys) + keys = c.keySlice(s+"."+suffix, keys) } return keys } -// Rules implements the MatcherBuilder Rules method. -func (dst *DomainSuffixTrie) Rules() (int, iter.Seq[string]) { - // TODO: Implement an iterator for the trie. - keys := dst.Keys() - return len(keys), slices.Values(keys) +// KeyCount returns the number of keys in the trie. +func (dst DomainSuffixTrie) KeyCount() int { + if dst.Children == nil { + return 1 + } + var count int + for _, c := range dst.Children { + count += c.KeyCount() + } + return count +} + +// Keys returns an iterator over the keys in the trie. +func (dst DomainSuffixTrie) Keys() iter.Seq[string] { + return func(yield func(string) bool) { + for s, c := range dst.Children { + if !c.keys(s, yield) { + return + } + } + } } -// MatcherCount implements the MatcherBuilder MatcherCount method. -func (dst *DomainSuffixTrie) MatcherCount() int { +func (dst DomainSuffixTrie) keys(suffix string, yield func(string) bool) bool { if dst.Children == nil { + return yield(suffix) + } + for s, c := range dst.Children { + if !c.keys(s+"."+suffix, yield) { + return false + } + } + return true +} + +// Rules implements [MatcherBuilder.Rules]. +func (dst DomainSuffixTrie) Rules() (int, iter.Seq[string]) { + return dst.KeyCount(), dst.Keys() +} + +// MatcherCount implements [MatcherBuilder.MatcherCount]. +func (dst DomainSuffixTrie) MatcherCount() int { + if len(dst.Children) == 0 { return 0 } return 1 } -// AppendTo implements the MatcherBuilder AppendTo method. +// AppendTo implements [MatcherBuilder.AppendTo]. func (dst *DomainSuffixTrie) AppendTo(matchers []Matcher) ([]Matcher, error) { - if dst.Children == nil { + if len(dst.Children) == 0 { return matchers, nil } return append(matchers, dst), nil } -func NewDomainSuffixTrie(capacity int) MatcherBuilder { - return &DomainSuffixTrie{} +// NewDomainSuffixTrie returns a new [DomainSuffixTrie]. +func NewDomainSuffixTrie() DomainSuffixTrie { + return DomainSuffixTrie{ + Children: make(map[string]DomainSuffixTrie), + } } -func DomainSuffixTrieFromSlice(suffixes []string) *DomainSuffixTrie { - var dst DomainSuffixTrie +// NewDomainSuffixTrieMatcherBuilder returns a new [*DomainSuffixTrie] as a [MatcherBuilder]. +func NewDomainSuffixTrieMatcherBuilder(_ int) MatcherBuilder { + dst := NewDomainSuffixTrie() + return &dst +} + +// DomainSuffixTrieFromSlice creates a [DomainSuffixTrie] from a slice of suffix rules. +func DomainSuffixTrieFromSlice(suffixes []string) DomainSuffixTrie { + dst := NewDomainSuffixTrie() for _, s := range suffixes { dst.Insert(s) } - return &dst + return dst } -// DomainSuffixTrieFromSeq creates a [*DomainSuffixTrie] from a sequence of suffix rules. -func DomainSuffixTrieFromSeq(_ int, suffixSeq iter.Seq[string]) *DomainSuffixTrie { - var dst DomainSuffixTrie +// DomainSuffixTrieFromSeq creates a [DomainSuffixTrie] from a sequence of suffix rules. +func DomainSuffixTrieFromSeq(_ int, suffixSeq iter.Seq[string]) DomainSuffixTrie { + dst := NewDomainSuffixTrie() for suffix := range suffixSeq { dst.Insert(suffix) } - return &dst + return dst }