-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathweb_scraper.py
61 lines (45 loc) · 1.76 KB
/
web_scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from bs4 import BeautifulSoup
import requests
import re
from urllib.parse import urlparse
def ebay_scraper(url, document):
title = document.select("h1.x-item-title__mainTitle")[0].text.strip()
title = re.split(" - | AMD| Intel| Geforce| Nvidia| Windows", title)[0]
find_text_by_parent = lambda parent, all_words: all_words[0].parent.find(parent).text
words = document.select("div.x-price-primary")[0].text.strip()
words = words.split("US ")[1].strip("$")
true_price = float("".join(d for d in words if d != ","))
return title, words, true_price
def newegg_scraper(url, document):
title = document.select("h1.product-title")[0].text.strip()
title = re.split(" - | AMD| Intel| Geforce| Nvidia| Windows", title)[0]
find_text_by_parent = lambda parent, all_words: all_words[0].parent.find(parent).text
words = document.select("li.price-current")[0].text.strip("$")
true_price = float("".join(d for d in words if d != ","))
return title, words, true_price
def main():
url = input("Enter URL\n\n> ")
domain = urlparse(url).netloc
if domain == "www.ebay.com":
choice = 1
elif domain == "www.newegg.com":
choice = 2
else:
print("Invalid URL")
return
result = requests.get(url)
doc = BeautifulSoup(result.text, "html.parser")
try:
if choice == 1:
title, formatted, true = ebay_scraper(url, doc)
if choice == 2:
title, formatted, true = newegg_scraper(url, doc)
except Exception:
return
print(f'Price for "{title}" on {domain} is ${formatted}')
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print("\n\nExiting...")
exit()