-
Notifications
You must be signed in to change notification settings - Fork 0
/
merriam_webster_API.py
executable file
·175 lines (157 loc) · 6.71 KB
/
merriam_webster_API.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# sent a query for a word to the www.merriam-webster.com
from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import bcolors
class merriam_webster_api()
# www.merrian-webster.com url
dic_url = 'https://www.merriam-webster.com/dictionary/'
# thesaurus url
thesa_url = 'https://www.merriam-webster.com/thesaurus/'
# dictionary api
API_url_dictionary = "https://www.dictionaryapi.com/api/v3/references/collegiate/json/"
API_key_dictionary = "?key=401b3951-f31a-474e-a13e-66fc0d46de0f"
# thesauarus api
API_url_thesaurus = "https://www.dictionaryapi.com/api/v3/references/thesaurus/json/"
API_key_thesaurus = "?key=c7bd68ed-a9c2-47a5-92fc-5764ed7092a4"
# html soup for it
soup = None
# word queried
word = None
# word data
word_data = None
def query_API(self, word):
'''queries a word to www.merriam-webster.com API and retuns a dic
with the sintaxes definitions and synonyms and pronunciation '''
word = word.rstrip()
url = API_url_dictionary + word + API_key_dictionary
json_data = urlopen(url)
def request_html(self, url):
'''request a web page html '''
try:
html = urlopen(url)
return html
except:
print(f"{bcolors.FAIL}Could connet to merriam-webster.com{bcolors.ENDC}")
return None
def make_html_soup(self, html):
''' make an html soup wit the given html file'''
try:
soup = BeautifulSoup(html, 'lxml' )
return soup
except:
print(f"{bcolors.FAIL}Couldn't make soup out of html{bcolors.ENDC}")
return None
def is_word_not_found(self, soup):
'''check whether the word was not found '''
if soup.find_all('div', class_='words_fail_us_cont'):
return True
elif soup.find_all('h1', class_='missing-query'):
return True
else:
return False
def get_word(self, soup):
'''gets a the word from a given html soup '''
# get word
try:
self.word = soup.find('h1', class_='hword')
return word.string.rstrip()
except:
print(f"{bcolors.FAIL}Could not get word{bcolors.ENDC}")
return None
def abort(self):
'''abort print error and exit '''
print(f"{bcolors.FAIL}Aborting{bcolors.ENDC}")
return None
def get_pronunciations(self, soup):
'''gets a the pronunciation for a word from a given html soup '''
try:
pronunciations = soup.find_all('span', class_='pr')
# clean the children
return [ pronunciation.get_text().rstrip() for pronunciation in pronunciations ]
except:
print(f"{bcolors.WARNING}Could not get pronunciation{bcolors.ENDC}")
return None
def get_definitions(self, soup):
'''gets a the syntax and definitions for a word from a given html soup '''
# get syntaxes
definitions = {}
try:
syntaxes = soup.find_all('span', class_="fl")
definition_divs = soup.find_all('div', id=re.compile("dictionary\-entry\-[1-9]"))
for num in range(0, len(definition_divs)):
definitions[syntaxes[num].string] = []
definition_tags = definition_divs[num].find_all('span', class_="dtText")
for tag in definition_tags:
# clean the example tag so it does not appear in the definition
[examples.clear() for examples in tag.find_all('span', class_='ex-sent')]
# clean the colon tag so it does not appear in the definition
[examples.clear() for examples in tag.find_all('strong', class_='mw_t_bc')]
# save string defintions
definitions[syntaxes[num].string].append([string.rstrip() for string in tag.stripped_strings][0])
return definitions
except:
print(f"{bcolors.FAIL}Could not get definition{bcolors.ENDC}")
return None
def get_synonyms(self, soup):
''' get the synomyms of the web page '''
synonyms = []
try:
synonym_label = soup.find('div', id='synonyms-anchor').find_all('p', class_="function-label")
for label in synonym_label:
if re.match('^Synonym.*$', label.string):
synonyms.extend([synonym_tag.string for synonym_tag in label.next_sibling.find_all('a')])
return synonyms
except:
print(f"{bcolors.WARNING}Could not get synonyms{bcolors.ENDC}")
return None
def get_antoyms(self, soup):
''' get the antonyms of the web page '''
antonyms = []
try:
synonym_label = soup.find('div', id='synonyms-anchor').find_all('p', class_="function-label")
for label in synonym_label:
if re.match('^Antonym.*$', label.string):
antonyms.extend([antonym_tag.string for antonym_tag in label.next_sibling.find_all('a')])
return antonyms
except:
print(f"{bcolors.WARNING}Could not get antonyms{bcolors.ENDC}")
return None
def query_webpage(self, word):
'''Query a word using the webpage'''
self.url = self.dic_url + word
html = self.request_html(self.soup)
return None if html is None
soup = request_soup(url)
return None if soup is None
# get word
word_name = self.get_word(soup)
# get pronunciation
pronunciations = self.get_pronunciations(soup)
# get word syntax defintion
definitions = self.get_definitions(soup)
return None if definitions is None
# get synonyms
synonyms = self.get_synonyms(soup)
# get anyonyms
antonyms = self.get_antoyms(soup)
# make a dictionary data of the word
# if we where unable to get word of definitions then return None
return {'word' : word_name,
'pronunciation': pronunciations,
'definitions': definitions,
'synonyms': synonyms,
'antonyms': antonyms }
def query_word(self, word):
'''queries a word to dictionary.com and retuns a dic
with the sintaxes definitions and synonyms and pronunciation '''
self.word = word.rstrip()
# Try to query word with API
#definition = query_API(word)
# if got positive result
#if definition is not None:
# fix json format
#return result
# try to get word from webpage
self.word_data = self.scrap_webpage(word)
return self.abort() if self.word_data is None