diff --git a/README.md b/README.md index a27e7074..1cf3ce66 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com and Power.dk**<br/><br/> +**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk**<br/><br/> **The Fakta scraper can scrape discounts from this week discounts.** <br/> @@ -81,6 +81,14 @@ There is some optional arguments you can use when running add_product.py, these - --power +- --expert + +- --mmvision + +- --coolshop + +- --sharkgaming + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name. <br/> diff --git a/tech_scraping/README.md b/tech_scraping/README.md index b7b85503..b992bfba 100644 --- a/tech_scraping/README.md +++ b/tech_scraping/README.md @@ -1,4 +1,4 @@ -**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com and Power.dk** +**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk** # Table of contents - [First setup](#first-setup) @@ -69,4 +69,12 @@ There is some optional arguments you can use when running add_product.py, these - --power +- --expert + +- --mmvision + +- --coolshop + +- --sharkgaming + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name. diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index 0b751ddc..e4670866 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -3,7 +3,7 @@ import requests from bs4 import BeautifulSoup import json -from scraping import change_name +from scraping import change_name, change_æøå import argparse @@ -64,6 +64,26 @@ def argparse_setup(): 'if this is the only optional flag', action="store_true") + parser.add_argument('--expert', + help='add only expert-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + + parser.add_argument('--mmvision', + help='add only mm-vision-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + + parser.add_argument('--coolshop', + help='add only coolshop-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + + parser.add_argument('--sharkgaming', + help='add only sharkgaming-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + return parser.parse_args() @@ -97,30 +117,23 @@ def get_product_name(link): return change_name(html_soup.find('h1', class_='product-title').text.lower()) elif URL_domain == 'www.power.dk': return change_name(html_soup.find('title').text.replace(' - Power.dk', '').lower()) + elif URL_domain == 'www.expert.dk': + return change_name(html_soup.find('meta', property='og:title')['content'].lower()) + elif URL_domain == 'www.mm-vision.dk': + return change_name(html_soup.find('h1', itemprop='name').text.strip().lower()) + elif URL_domain == 'www.coolshop.dk': + return change_name(html_soup.find('div', class_='thing-header').text.strip().lower()) + elif URL_domain == 'www.sharkgaming.dk': + return change_name(html_soup.find('div', class_='product-name').text.strip().lower()) else: return None -def ændre_æøå(navn): - """Change the letters æ, ø and å to international letters to avoid unicode and return the new name.""" - nyt_navn = '' - for bogstav in navn: - if bogstav in 'æøå': - if bogstav == 'æ': - bogstav = 'ae' - elif bogstav == 'ø': - bogstav = 'oe' - elif bogstav == 'å': - bogstav = 'aa' - nyt_navn += bogstav - return nyt_navn - - def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" json_object = json.loads('{}') - if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power: + if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop or args.sharkgaming: if args.komplett: json_object.update({ f"{komplett_domain}": { @@ -211,6 +224,46 @@ def check_arguments(): "dates": {} } }) + if args.expert: + json_object.update({ + f"{expert_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) + if args.mmvision: + json_object.update({ + f"{mmvision_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) + if args.coolshop: + json_object.update({ + f"{coolshop_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) + if args.sharkgaming: + json_object.update({ + f"{sharkgaming_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) else: json_object = { f"{komplett_domain}": { @@ -275,6 +328,34 @@ def check_arguments(): "url": "" }, "dates": {} + }, + f"{expert_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + }, + f"{mmvision_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + }, + f"{coolshop_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + }, + f"{sharkgaming_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} } } return json_object @@ -314,6 +395,14 @@ def find_domain(domain): return 'eBay' elif domain == 'www.power.dk': return 'Power' + elif domain == 'www.expert.dk': + return 'Expert' + elif domain == 'www.mm-vision.dk': + return 'MMVision' + elif domain == 'www.coolshop.dk': + return 'Coolshop' + elif domain == 'www.sharkgaming.dk': + return 'Sharkgaming' def add_to_scraper(kategori, link, url_domain): @@ -334,9 +423,9 @@ def main(kategori, link): print(f'Sorry, but I can\'t scrape from this domain: {URL_domain}') return - # Ændre æ, ø og/eller å - kategori = ændre_æøå(kategori) - produkt_navn = ændre_æøå(produkt_navn) + # Change æ, ø and/or å + kategori = change_æøå(kategori) + produkt_navn = change_æøå(produkt_navn) save_json(kategori, produkt_navn) add_to_scraper(kategori, link, URL_domain) @@ -352,5 +441,9 @@ def main(kategori, link): amazon_domain = 'www.amazon.com' ebay_domain = 'www.ebay.com' power_domain = 'www.power.dk' + expert_domain = 'www.expert.dk' + mmvision_domain = 'www.mm-vision.dk' + coolshop_domain = 'www.coolshop.dk' + sharkgaming_domain = 'www.sharkgaming.dk' args = argparse_setup() main(args.category, args.url) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index 22cf632e..a72080fe 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -44,7 +44,7 @@ def __init__(self, category, URL): except Exception as err: logger.error(f'Failed in method "{self.__class__.__name__}.get_info()": {err}', exc_info=True) - self.name = change_name(self.name) + self.name = change_æøå(change_name(self.name)) self.date = str(datetime.today().strftime('%Y-%m-%d')) self.get_part_num() self.shorten_url() @@ -95,6 +95,14 @@ def get_part_num(self): self.part_num = self.URL.split('=')[1] elif self.URL_domain == 'www.power.dk': self.part_num = self.URL.split('/')[-2].replace('p-', '') + elif self.URL_domain == 'www.expert.dk': + self.part_num = self.URL.split('/')[-2].replace('p-', '') + elif self.URL_domain == 'www.mm-vision.dk': + self.part_num = self.html_soup.find('input', type='radio')['value'] + elif self.URL_domain == 'www.coolshop.dk': + self.part_num = self.html_soup.find_all('div', id='attributeSku')[1].text.strip() + elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk': + self.part_num = 'Non existing on Sharkgaming' def check_part_num(self): """ @@ -167,6 +175,14 @@ def shorten_url(self): self.short_url = self.URL.split('?')[0] elif self.URL_domain == 'www.power.dk': self.short_url = f'https://www.power.dk/{self.URL.split("/")[3]}/p-{self.part_num}' + elif self.URL_domain == 'www.expert.dk': + self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}' + elif self.URL_domain == 'www.mm-vision.dk': + self.short_url = self.URL + elif self.URL_domain == 'www.coolshop.dk': + self.short_url = f'https://www.coolshop.dk/produkt/{self.URL.split("/")[-2]}/' + elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk': + self.short_url = self.URL def print_info(self): """Print info about the product in the terminal.""" @@ -203,6 +219,21 @@ def change_name(name): return name +def change_æøå(name): + """Change the letters æ, ø and å to international letters to avoid unicode and return the new name.""" + new_name = '' + for letter in name: + if letter in 'æøå': + if letter == 'æ': + letter = 'ae' + elif letter == 'ø': + letter = 'oe' + elif letter == 'å': + letter = 'aa' + new_name += letter + return new_name + + class Komplett(Scraper): def get_info(self): self.name = self.html_soup.find('div', class_='product-main-info__info').h1.span.text.lower() @@ -270,6 +301,30 @@ def get_info(self): self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0] +class Expert(Scraper): + def get_info(self): + self.name = self.html_soup.find('meta', property='og:title')['content'].lower() + self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0] + + +class MMVision(Scraper): + def get_info(self): + self.name = self.html_soup.find('h1', itemprop='name').text.strip().lower() + self.price = self.html_soup.find('h3', class_='product-price text-right').text.replace(',-', '').replace('.', '') + + +class Coolshop(Scraper): + def get_info(self): + self.name = self.html_soup.find('div', class_='thing-header').text.strip().lower() + self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split('.')[0] + + +class Sharkgaming(Scraper): + def get_info(self): + self.name = self.html_soup.find('div', class_='product-name').text.strip().lower() + self.price = self.html_soup.find('span', class_='price').text.replace(' kr.', '').replace('.', '') + + if __name__ == '__main__': logger = log_setup() Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')