From d0963bd68a75dfbc42a6d5cd12f18da5f367fb0b Mon Sep 17 00:00:00 2001 From: lyric Date: Wed, 8 Jun 2016 11:50:07 +0800 Subject: [PATCH] Add replace function --- filter.go | 5 ++ nodefilter.go | 60 +++++++++++++++++++++ nodefilter_test.go | 11 ++++ sample/{memory/main.go => memory_sample.go} | 2 +- wercker.yml | 29 ---------- 5 files changed, 77 insertions(+), 30 deletions(-) rename sample/{memory/main.go => memory_sample.go} (81%) delete mode 100644 wercker.yml diff --git a/filter.go b/filter.go index 8fe2ac1..9301e81 100644 --- a/filter.go +++ b/filter.go @@ -29,4 +29,9 @@ type DirtyFilter interface { // 返回可读流中出现的敏感词及出现次数,如果敏感词不存在则返回nil // 如果出现异常,则返回error FilterReaderResult(reader io.Reader, excludes ...rune) (map[string]int, error) + + // Replace 使用字符替换文本中的敏感词 + // delim 替换的字符 + // 如果出现异常,则返回error + Replace(text string, delim rune) (string, error) } diff --git a/nodefilter.go b/nodefilter.go index 0f7b1cb..2d22ec3 100644 --- a/nodefilter.go +++ b/nodefilter.go @@ -139,6 +139,15 @@ func (nf *nodeFilter) FilterReaderResult(reader io.Reader, excludes ...rune) (ma return data, nil } +func (nf *nodeFilter) Replace(text string, delim rune) (string, error) { + uchars := []rune(text) + idexs := nf.doIndexes(uchars) + for i := 0; i < len(idexs); i++ { + uchars[idexs[i]] = rune(delim) + } + return string(uchars), nil +} + func (nf *nodeFilter) checkExclude(u rune, excludes ...rune) bool { if len(excludes) == 0 { return false @@ -188,3 +197,54 @@ func (nf *nodeFilter) doFilter(uchars []rune, data map[string]int) { data[result[i]] = c + 1 } } + +func (nf *nodeFilter) doIndexes(uchars []rune) (idexs []int) { + var ( + tIdexs []int + ul = len(uchars) + n = nf.root + ) + for i := 0; i < ul; i++ { + if _, ok := n.child[uchars[i]]; !ok { + continue + } + n = n.child[uchars[i]] + tIdexs = append(tIdexs, i) + if n.end { + idexs = nf.appendTo(idexs, tIdexs) + tIdexs = nil + } + for j := i + 1; j < ul; j++ { + if _, ok := n.child[uchars[j]]; !ok { + break + } + n = n.child[uchars[j]] + tIdexs = append(tIdexs, j) + if n.end { + idexs = nf.appendTo(idexs, tIdexs) + } + } + if tIdexs != nil { + tIdexs = nil + } + n = nf.root + } + return +} + +func (nf *nodeFilter) appendTo(dst, src []int) []int { + var t []int + for i, il := 0, len(src); i < il; i++ { + var exist bool + for j, jl := 0, len(dst); j < jl; j++ { + if src[i] == dst[j] { + exist = true + break + } + } + if !exist { + t = append(t, src[i]) + } + } + return append(dst, t...) +} diff --git a/nodefilter_test.go b/nodefilter_test.go index 2105ff0..8802036 100644 --- a/nodefilter_test.go +++ b/nodefilter_test.go @@ -2,6 +2,7 @@ package filter_test import ( "bytes" + "strings" "github.com/antlinker/go-dirtyfilter" . "github.com/onsi/ginkgo" @@ -73,4 +74,14 @@ var _ = Describe("使用节点过滤器过滤敏感词数据", func() { Expect(result).To(Equal(map[string]int{"陈水扁": 1})) }) + It("替换文本中的敏感词数据", func() { + nodeFilter = filter.NewNodeFilter([]string{"共产主义"}) + data, err := nodeFilter.Replace(filterText, '*') + if err != nil { + Fail(err.Error()) + return + } + Expect(data).To(Equal(strings.Replace(filterText, "共产主义", "****", 1))) + }) + }) diff --git a/sample/memory/main.go b/sample/memory_sample.go similarity index 81% rename from sample/memory/main.go rename to sample/memory_sample.go index dddb36a..f193886 100644 --- a/sample/memory/main.go +++ b/sample/memory_sample.go @@ -8,7 +8,7 @@ import ( ) var ( - filterText = `我是需要过滤的内容,内容为:**文@@件,需要过滤。。。` + filterText = `我是需要过滤的内容,内容为:**文*@@件**名,需要过滤。。。` ) func main() { diff --git a/wercker.yml b/wercker.yml deleted file mode 100644 index 71bad14..0000000 --- a/wercker.yml +++ /dev/null @@ -1,29 +0,0 @@ -box: golang - -# Build definition -build: - # The steps that will be executed on build - steps: - - # Sets the go workspace and places you package - # at the right place in the workspace tree - - setup-go-workspace - - # golint step! - - wercker/golint - - script: - name: go get - code: | - go get -t ./... - - # Build the project - - script: - name: go build - code: | - go build ./... - - # Test the project - - script: - name: go test - code: | - go test ./... \ No newline at end of file