-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathemoji_sample.py
63 lines (53 loc) · 1.9 KB
/
emoji_sample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import emoji
import enchant
import nltk
import re
english_ch = re.compile("[A-Za-z0-9]+")
#===================================================================================#
'''
Check if a token is an emoji
'''
def is_emoji(word):
if any(char in emoji.UNICODE_EMOJI for char in word):
return True
return False
#===================================================================================#
"""
Remove all digits and special characters
"""
def remove_special(tokens):
return [re.sub("(\\d|\\W)+", " ", token) for token in tokens]
#===============================================================#
"""
Remove blancs on words
"""
def remove_blanc(tokens):
return [token.strip() for token in tokens]
#===============================================================#
s= ['😍😍😍', '🍕🍕', '🚕🚕🚕', '🥟🥟🥟', '😛🍖🍖🍖🍖🍖🍖🍗🍗🍗🍗🍗🍗', 'I\’m still not going but I\’m contributing bc I love u 👯<U+200D>♀️','Kamil’s baby 👶🚿', 'One medium set. Go Hannah💪', '🎉', '🚙 vroom-vroom', '2/2 lmao idk how to count','That favor bro 🎉💇', '🐔 pocket pesto', 'a','b','2323','thg','pkl']
conlyemoji = 0
conlytext = 0
ctextemoji = 0
for note in s:
origtokens = nltk.word_tokenize(note)
origtokens = remove_blanc(origtokens)
english = 0
if(english_ch.search(note) is not None):
english = 1
onlyemoji = 0
for t in origtokens:
if(is_emoji(t)):
if(english == 1):
print(note,"TEXT + EMOJI")
ctextemoji += 1
break
else:
onlyemoji += 1
if(onlyemoji == len(origtokens) and onlyemoji > 0):
print(note,"ONLY EMOJI")
conlyemoji += 1
conlytext = len(s) - ctextemoji - conlyemoji
print("########################")
print("ONLY EMOJI",conlyemoji)
print("TEXT + EMOJI",ctextemoji)
print("ONLY TEXT",conlytext)