-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwords_master.py
64 lines (53 loc) · 2.3 KB
/
words_master.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
from googletrans import Translator
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
from random import randint
import nltk.data
translator = Translator()
def translate_text(text, to_language):
translation = translator.translate(text, dest=to_language)
return translation.text
class words_master():
def __init__(self):
self.tokenizer = nltk.data.load('tokenizers/punkt/english.pickle')
self.valid_replacements = ['JJ','NN','VB']
def change_words(self, text_block):
output_text = ''
print(text_block)
en_text = translate_text(text_block, 'en')
print(en_text)
tokenized_text = self.tokenizer.tokenize(en_text)
words_text = word_tokenize(en_text)
tagged_text = nltk.pos_tag(words_text)
#print(tagged_text)
for i in range(0,len(words_text)):
replacements = []
# Only replace nouns with nouns, vowels with vowels etc.
for syn in wordnet.synsets(words_text[i]):
if tagged_text[i][1] in self.valid_replacements:
word_type = tagged_text[i][1][0].lower()
if syn.name().find("."+word_type+"."):
# extract the word only
r = syn.name()[0:syn.name().find(".")]
replacements.append(r)
#print(replacements)
if len(replacements) > 0:
# Choose a random replacement
replacement = replacements[randint(0,len(replacements)-1)]
output_text = output_text + " " + replacement
else:
# If no replacement could be found, then just use the original word
output_text = output_text + " " + words_text[i]
print(output_text)
ro_text = translate_text(output_text, 'ro')
print(ro_text)
return ro_text
def change_words_simplified(self, text_block):
print(text_block)
slight_text = translate_text(translate_text(text_block, 'en'), 'ro')
print(slight_text)
return slight_text
#text_in = "Dl. Vlad nu este prea fericit cu situatia actuala a pietei."
#wm = words_master()
#wm.change_words(text_in)
#wm.change_words_simplified(text_in)