Skip to content

Commit

Permalink
Tune the gibberish detector
Browse files Browse the repository at this point in the history
  • Loading branch information
duogenesis committed Dec 18, 2024
1 parent ddda2fe commit cad2b69
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 6 deletions.
2 changes: 1 addition & 1 deletion service/chat/spam/gibberishdetector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,7 @@ def string_probability(s, bigram_probs):
return prob


def contains_gibberish(text, window_size=10, prob_threshold=-200):
def contains_gibberish(text, window_size=10, prob_threshold=-50):
"""Detect unlikely text based on bigram probabilities"""
if len(text) == 0:
return False
Expand Down
25 changes: 20 additions & 5 deletions service/chat/spam/gibberishdetector/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,24 @@ def test_gibberish_strings(self):
contains_gibberish(
"Hello how are you doing Text me on telegram,, @laura_rosaline_klein or. add my Zangi private number 1061353927"))

self.assertTrue(
contains_gibberish(
"""EPSSVSMMKVIPGFJR
Alex meow """))

self.assertTrue(
contains_gibberish(
"""WIGLXVAQTLRIIKQT
Takakura ken meow """))

self.assertTrue(
contains_gibberish(
"""KUNBJDRUBDKRSWUY
Morgan meow """))


def test_non_gibberish_strings(self):
self.assertFalse(
Expand All @@ -56,7 +74,7 @@ def test_non_gibberish_strings(self):

self.assertFalse(
contains_gibberish(
"Online dating, but based and true love-pilled 💕"))
"Online dating, but based and true love-pilled"))

self.assertFalse(
contains_gibberish(
Expand All @@ -77,10 +95,7 @@ def test_non_gibberish_strings(self):
""".strip()))

self.assertFalse(
contains_gibberish(
"""
在一个阳光明媚的早晨,小兔子跳跳发现了一朵开得特别美的向日葵。它忍不住凑近闻了闻,结果一只小蜜蜂从花瓣后面探出了脑袋,吓了跳跳一跳。“别怕,我只是采蜜的小蜜蜂!”蜜蜂笑着说。
""".strip()))
contains_gibberish("在一个阳光明媚的早晨。"))

self.assertFalse(
contains_gibberish("Ich finde dich süß. Hast du einen Freund?"))
Expand Down

0 comments on commit cad2b69

Please sign in to comment.