Skip to content

Commit

Permalink
Moderation improvements (#564)
Browse files Browse the repository at this point in the history
  • Loading branch information
duogenesis authored Dec 21, 2024
1 parent df208a8 commit e59b187
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 17 deletions.
2 changes: 2 additions & 0 deletions banned-club.sql
Original file line number Diff line number Diff line change
Expand Up @@ -800,6 +800,7 @@ VALUES
('fruits as a metaphor for sex'),
('fuck'),
('fuck 12'),
('fuckable'),
('fuck acab'),
('fuckaduck'),
('fuck any colors'),
Expand Down Expand Up @@ -2287,6 +2288,7 @@ VALUES
('rag heads'),
('ragheads'),
('rape'),
('rapeable'),
('rapebait'),
('rapebastard club'),
('rapebastardclub'),
Expand Down
1 change: 1 addition & 0 deletions chat.requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
confusable-homoglyphs
lxml
nltk
psycopg[binary]
Expand Down
2 changes: 1 addition & 1 deletion service/chat/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@

PORT = sys.argv[1] if len(sys.argv) >= 2 else 5443

MAX_INTROS_PER_DAY = 50
MAX_INTROS_PER_DAY = 35

# TODO: Tables to migrate to monolithic DB:
#
Expand Down
110 changes: 106 additions & 4 deletions service/chat/offensive/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from confusable_homoglyphs import confusables
import re
import unicodedata

Expand Down Expand Up @@ -36,6 +37,8 @@
"fvck": "fuck",
"fvcked": "fucked",
"fvcking": "fucking",
"ngger": "nigger",
"nggr": "nigger",
"nggr": "nigger",
"p0rn": "porn",
"pissin": "pissing",
Expand All @@ -54,6 +57,7 @@
"anal",
"anally",
"anus",
"around your throat",
"ass fuck",
"ass fucked",
"ass fucker",
Expand All @@ -66,7 +70,10 @@
"assfucked",
"assfucker",
"assfucking",
"back shots",
"backshots",
"ballsack",
"bash your",
"beastial",
"beastiality",
"bellend",
Expand All @@ -91,6 +98,7 @@
"butt-hole",
"butthole",
"buttstuff",
"bwc",
"carpet muncher",
"cawk",
"cervix",
Expand Down Expand Up @@ -131,13 +139,27 @@
"cuntlicking",
"cunts",
"cut me",
"cut my wrist",
"cut my wrists",
"cut myself",
"cut you",
"cut your wrist",
"cut your wrists",
"cut yourself",
"cutting myself",
"cutting yourself",
"deep throat",
"deep throated",
"deepthroat",
"dick",
"dildo",
"dildos",
"do it raw",
"dog-fucker",
"doggin",
"dogging",
"down my throat",
"down your throat",
"dyke",
"dykes",
"e sex",
Expand All @@ -154,6 +176,7 @@
"ejaculatings",
"ejaculation",
"ejakulate",
"end your life",
"esex",
"faggitt",
"faggot",
Expand Down Expand Up @@ -186,8 +209,10 @@
"fuck your face",
"fuck your mouth",
"fuck your thighs",
"fuck your throat",
"fuck your tits",
"fuck yourself",
"fuckable",
"fucking me",
"fucking you",
"gag me",
Expand All @@ -214,6 +239,7 @@
"hanging myself",
"hanging yourself",
"heil",
"hit it raw",
"hoe",
"hoes",
"horniest",
Expand All @@ -222,6 +248,8 @@
"incest",
"jack-off",
"jackoff",
"jeet",
"jeets",
"jerk off",
"jerk-off",
"jerked off",
Expand All @@ -245,6 +273,8 @@
"labia",
"lick my",
"lick your",
"like it raw",
"likes it raw",
"loli",
"lolicon",
"masterbate",
Expand All @@ -259,6 +289,7 @@
"molester",
"molesting",
"my nuts",
"my throat",
"necrophilia",
"nigga",
"niggas",
Expand All @@ -273,6 +304,12 @@
"orgy",
"paedo",
"paedophile",
"pajeet",
"pajeeta",
"pajeetas",
"pajeets",
"paki",
"pakis",
"paraphilias",
"pedo",
"pedophile",
Expand All @@ -281,9 +318,15 @@
"penis",
"penisfucker",
"phonesex",
"pin you",
"piss",
"pissflaps",
"pissin",
"pissing",
"poojeet",
"poojeeta",
"poojeetas",
"poojeets",
"porn",
"porno",
"pornography",
Expand All @@ -294,11 +337,13 @@
"pussys",
"rail you",
"rape",
"rapeable",
"rapebait"
"raped",
"rapes",
"raping",
"rapist",
"raw dog you",
"retard",
"retardation",
"retarded",
Expand All @@ -319,20 +364,40 @@
"shota",
"shotacon",
"skank",
"slit my wrist",
"slit my wrists",
"slit your wrist",
"slit your wrists",
"slut",
"sluts",
"slutty",
"smegma",
"sodomize",
"sodomy",
"some head",
"spic",
"spit in my face",
"spit in my mouth",
"spit in your face",
"spit in your mouth",
"spit on me",
"spit on my face",
"spit on my mouth",
"spit on you",
"spit on your face",
"spit on your mouth",
"stabbing me",
"stabbing you",
"strangle me",
"strangle you",
"suicidal",
"suicide",
"testicle",
"throat fuck",
"throat fucking",
"throat pussy",
"throatfuck",
"throatfucking",
"tie me",
"tie you",
"tit fuck",
Expand Down Expand Up @@ -376,9 +441,13 @@
"whore",
"whores",
"wincest",
"you are retarded",
"you retard",
"you retarded",
"you will never be a woman",
"you'll never be a woman",
"your throat",
"your throat",
"ywnbaw",
"zoophilia",
]
Expand All @@ -390,17 +459,47 @@


# Characters which were repeated more than once
_repeated_characters_pattern = re.compile(r'(.)\1+')
_repeated_characters_pattern = re.compile(r'(.)\1+', re.IGNORECASE)


_offensive_matcher = re.compile(_offensive_pattern, re.IGNORECASE)


def _apply_normalization_map(haystack: str):
def _get_latin_homoglyph(char: str) -> str:
"""
Returns a Latin homoglyph for the given character if available.
If no Latin homoglyph is found, returns the original character.
"""
# confusables.is_confusable returns a list of dictionaries, one per confusable character
# Each dictionary can contain a 'homoglyphs' key, which is a list of homoglyph entries.
info_seq = confusables.is_confusable(char, preferred_aliases=['latin']) or []

for info in info_seq:
# Extract the first Latin homoglyph character if one exists
latin_homoglyphs = (
h['c']
for h in info.get('homoglyphs', [])
if info.get('alias') not in ['LATIN', 'COMMON']
)
latin_homoglyph = next(latin_homoglyphs, None)
if latin_homoglyph:
return latin_homoglyph

return char


def _normalize_homoglyphs(s: str) -> str:
"""
Normalizes an input string by replacing characters that are confusable with Latin homoglyphs.
"""
return ''.join(_get_latin_homoglyph(char) for char in s)


def _normalize_spelling(haystack: str):
for needle, replacement in _normalization_map.items():
# Apparently compiled regexes are cached between invocations of
# re.compile.
pattern = re.compile(f"\\b{needle}\\b")
pattern = re.compile(f"\\b{needle}\\b", re.IGNORECASE)

haystack = pattern.sub(replacement, haystack)

Expand All @@ -415,6 +514,9 @@ def normalize_string(s: str):
char for char in normalized_input if not unicodedata.combining(char)
)

# Mitigate homoglyph attacks
normalized_input = _normalize_homoglyphs(normalized_input)

# Normalize whitespace
normalized_input = ' '.join(_split_pattern.split(normalized_input))

Expand All @@ -423,7 +525,7 @@ def normalize_string(s: str):
r'\1\1', normalized_input)

# Replace slang
normalized_input = _apply_normalization_map(normalized_input)
normalized_input = _normalize_spelling(normalized_input)

return normalized_input

Expand Down
31 changes: 31 additions & 0 deletions service/chat/offensive/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ def test_offensive_strings(self):
self.assertTrue(
is_offensive("go fk urself please"))

self.assertTrue(
is_offensive("GO FK URSELF PLEASE"))

self.assertTrue(
is_offensive("I want to cvm on u"))

Expand All @@ -25,6 +28,19 @@ def test_offensive_strings(self):
self.assertTrue(
is_offensive("Would you like to purchase viagra?"))

# whore is offensive even if followed by an emoji without space between
self.assertTrue(
is_offensive("you are a whore🙂"))

self.assertTrue(
is_offensive("you are a whore!"))

# Homoglyph attacks. The letter "o" in "whоre" has been replaced with a
# Cyrillic "о" (Unicode character U+043E), which looks identical to the
# Latin "o" (Unicode character U+006F)
self.assertTrue(
is_offensive("you filthy degеnerate pоrn addicted whоre !"))

self.assertTrue(
is_offensive("I am the rapist"))

Expand All @@ -47,6 +63,21 @@ def test_inoffensive_strings(self):
self.assertFalse(
is_offensive("I am therapist"))

# Using Cyrillic characters
def test_cyrillic_substitution(self):
# Substituting "a" with Cyrillic "а" (U+0430)
self.assertTrue(is_offensive("gаg me"))
# Substituting "e" with Cyrillic "е" (U+0435)
self.assertTrue(is_offensive("gave mе head"))
# Mixed Latin and Cyrillic characters
self.assertTrue(is_offensive("fuckаblе"))

# Using Greek characters
def test_greek_substitution(self):
# Substituting "o" with Greek "ο" (U+03BF)
self.assertTrue(is_offensive("gοok"))
# Substituting "i" with Greek "ι" (U+03B9)
self.assertTrue(is_offensive("gιmme head"))


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit e59b187

Please sign in to comment.