-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhelperfuncs.py
65 lines (54 loc) · 1.95 KB
/
helperfuncs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
class TextFormatting:
def __init__(self, text=""):
self.text = text
self.unique_dict = {}
def RemoveStopWords(self, stopwords=None, omit=None):
stop_words = ['the', 'a', 'and', 'is', 'be', 'will', 'was', 'etc', 'an', 'https']
if stopwords is not None:
for i in stopwords:
stop_words.append(i)
if omit is not None:
for i in omit:
stop_words.remove(i)
#print(f'Removing following stop words: {stop_words}')
words = self.text.split(" ")
doc1 = ""
for i in words:
if i not in stop_words:
doc1 = doc1 + " " + i
self.text = doc1
self.TextTruncate()
def RemoveTagsAndPunctuations(self, punctuation=None, omit=None):
punctuations = r'''!()-[]{};:'"\,<>./?@#$%^&*_~'''
if punctuation is not None:
for i in punctuation:
punctuations += i
if omit is not None:
for i in omit:
punctuations = punctuations.replace(i, "")
#print(f'Removing following Punctuations: {punctuations}')
for i in self.text:
if i in punctuations:
self.text = self.text.replace(i, "")
def ToLower(self):
self.text = self.text.lower()
def CreateUniqueid(self):
self.ToLower()
unique_dict = {}
li = list(set(self.text.split(" ")))
li.sort()
for i, word in enumerate(li):
self.unique_dict.update({word: i})
def TextTruncate(self):
for i in self.text:
if i == " ":
self.text = self.text.replace(i, "", 1)
else:
break
def SearchStringCompatibility(self, KEYWORD):
search_words = KEYWORD.split()
search_string = ""
for i in range(len(search_words)):
search_string += search_words[i] + '+'
KEYWORD = search_string[:-1]
return KEYWORD