From da780a3780d68476bc0fca398705d5c7f604743c Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Sat, 3 Oct 2020 01:45:02 +0200 Subject: [PATCH] Add ability to scrape and add product from Sharkgaming.dk #90 --- README.md | 4 +++- tech_scraping/README.md | 4 +++- tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++- tech_scraping/scraping.py | 10 ++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f5ae52cc..1cf3ce66 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**

+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk**

**The Fakta scraper can scrape discounts from this week discounts.**
@@ -87,6 +87,8 @@ There is some optional arguments you can use when running add_product.py, these - --coolshop +- --sharkgaming + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md index b5f86756..b992bfba 100644 --- a/tech_scraping/README.md +++ b/tech_scraping/README.md @@ -1,4 +1,4 @@ -**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk** +**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk** # Table of contents - [First setup](#first-setup) @@ -75,4 +75,6 @@ There is some optional arguments you can use when running add_product.py, these - --coolshop +- --sharkgaming + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name. diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index e1f48ff9..e4670866 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -79,6 +79,11 @@ def argparse_setup(): 'if this is the only optional flag', action="store_true") + parser.add_argument('--sharkgaming', + help='add only sharkgaming-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + return parser.parse_args() @@ -118,6 +123,8 @@ def get_product_name(link): return change_name(html_soup.find('h1', itemprop='name').text.strip().lower()) elif URL_domain == 'www.coolshop.dk': return change_name(html_soup.find('div', class_='thing-header').text.strip().lower()) + elif URL_domain == 'www.sharkgaming.dk': + return change_name(html_soup.find('div', class_='product-name').text.strip().lower()) else: return None @@ -126,7 +133,7 @@ def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" json_object = json.loads('{}') - if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop: + if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop or args.sharkgaming: if args.komplett: json_object.update({ f"{komplett_domain}": { @@ -247,6 +254,16 @@ def check_arguments(): "dates": {} } }) + if args.sharkgaming: + json_object.update({ + f"{sharkgaming_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) else: json_object = { f"{komplett_domain}": { @@ -332,6 +349,13 @@ def check_arguments(): "url": "" }, "dates": {} + }, + f"{sharkgaming_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} } } return json_object @@ -377,6 +401,8 @@ def find_domain(domain): return 'MMVision' elif domain == 'www.coolshop.dk': return 'Coolshop' + elif domain == 'www.sharkgaming.dk': + return 'Sharkgaming' def add_to_scraper(kategori, link, url_domain): @@ -418,5 +444,6 @@ def main(kategori, link): expert_domain = 'www.expert.dk' mmvision_domain = 'www.mm-vision.dk' coolshop_domain = 'www.coolshop.dk' + sharkgaming_domain = 'www.sharkgaming.dk' args = argparse_setup() main(args.category, args.url) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index 59808774..a72080fe 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -101,6 +101,8 @@ def get_part_num(self): self.part_num = self.html_soup.find('input', type='radio')['value'] elif self.URL_domain == 'www.coolshop.dk': self.part_num = self.html_soup.find_all('div', id='attributeSku')[1].text.strip() + elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk': + self.part_num = 'Non existing on Sharkgaming' def check_part_num(self): """ @@ -179,6 +181,8 @@ def shorten_url(self): self.short_url = self.URL elif self.URL_domain == 'www.coolshop.dk': self.short_url = f'https://www.coolshop.dk/produkt/{self.URL.split("/")[-2]}/' + elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk': + self.short_url = self.URL def print_info(self): """Print info about the product in the terminal.""" @@ -315,6 +319,12 @@ def get_info(self): self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split('.')[0] +class Sharkgaming(Scraper): + def get_info(self): + self.name = self.html_soup.find('div', class_='product-name').text.strip().lower() + self.price = self.html_soup.find('span', class_='price').text.replace(' kr.', '').replace('.', '') + + if __name__ == '__main__': logger = log_setup() Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')