This repository has been archived by the owner on Nov 21, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
limit.py
54 lines (46 loc) · 1.75 KB
/
limit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import unicodedata
# import ahocorasick
from .config import get_config
wordlist = get_config('ban_word', 'wordlist')
def guolv(sent=str):
sent_cp = unicodedata.normalize('NFKC', sent) # 中文标点转为英文
sent_cp = sent_cp.lower() #转为小写
sent_cp = sent_cp.replace('&shape=portrait', '&shape=Portrait')
sent_cp = sent_cp.replace('&shape=landscape', '&shape=Landscape')
sent_cp = sent_cp.replace('&shape=square', '&shape=Square')
sent_list_ = sent_cp.split(",") # 从逗号处分开,返回列表
sent_list = []
for m in sent_list_:
sent_list.append(m.strip()) # 移除空格
# 生成过滤词列表
tags_guolu_list = []
for i in sent_list:
i_list = i.split(" ")
for o in i_list:
if o.strip() in wordlist:
tags_guolu_list.append(i)
# 移除发送列表中的违禁词
for j in tags_guolu_list:
sent_list.remove(j)
# 将过滤后的列表拼接为字符串
sent_str = ",".join(sent_list)
tags_guolu = ",".join(tags_guolu_list)
return sent_str, tags_guolu
# def build_actree(wordlist):
# actree = ahocorasick.Automaton()
# for index, word in enumerate(wordlist):
# actree.add_word(word, (index, word))
# actree.make_automaton()
# return actree
# def guolv(sent):
# words = wordlist
# actree = build_actree(wordlist=words)
# sent_cp = sent.lower() #转为小写
# tags_guolu = ""
# for i in actree.iter(sent):
# sent_cp = sent_cp.replace(i[1][1], "")
# tags_guolu += str(i[1][1]) + " "
# sent_cp = sent_cp.replace("landscape", "Landscape")
# sent_cp = sent_cp.replace("portrait", "Portrait")
# sent_cp = sent_cp.replace("square", "Square")
# return sent_cp,tags_guolu