From aaa05f2ca274c7ee3238385ddc41825157c199ca Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 2 Oct 2020 22:19:54 +0200 Subject: [PATCH 1/9] =?UTF-8?q?Translate=20function=20name=20from=20"?= =?UTF-8?q?=C3=A6ndre=5F=C3=A6=C3=B8=C3=A5"=20to=20"change=5F=C3=A6=C3=B8?= =?UTF-8?q?=C3=A5"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tech_scraping/add_product.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index 0b751ddc..be0a1ce1 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -101,7 +101,7 @@ def get_product_name(link): return None -def ændre_æøå(navn): +def change_æøå(navn): """Change the letters æ, ø and å to international letters to avoid unicode and return the new name.""" nyt_navn = '' for bogstav in navn: @@ -334,9 +334,9 @@ def main(kategori, link): print(f'Sorry, but I can\'t scrape from this domain: {URL_domain}') return - # Ændre æ, ø og/eller å - kategori = ændre_æøå(kategori) - produkt_navn = ændre_æøå(produkt_navn) + # Change æ, ø and/or å + kategori = Change_æøå(kategori) + produkt_navn = Change_æøå(produkt_navn) save_json(kategori, produkt_navn) add_to_scraper(kategori, link, URL_domain) From 3bd2ec2ee170acd7b6a048697ea15f2def2ce51a Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 2 Oct 2020 22:22:45 +0200 Subject: [PATCH 2/9] Fix a small typo --- tech_scraping/add_product.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index be0a1ce1..e1a89e61 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -335,8 +335,8 @@ def main(kategori, link): return # Change æ, ø and/or å - kategori = Change_æøå(kategori) - produkt_navn = Change_æøå(produkt_navn) + kategori = change_æøå(kategori) + produkt_navn = change_æøå(produkt_navn) save_json(kategori, produkt_navn) add_to_scraper(kategori, link, URL_domain) From 268ea5b5306502e646144076a4d6b8dc074f3e6a Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 2 Oct 2020 23:14:52 +0200 Subject: [PATCH 3/9] =?UTF-8?q?Translate=20variable=20names=20in=20functio?= =?UTF-8?q?n=20"change=5F=C3=A6=C3=B8=C3=A5"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tech_scraping/add_product.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index e1a89e61..c6b85ee7 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -101,19 +101,19 @@ def get_product_name(link): return None -def change_æøå(navn): +def change_æøå(name): """Change the letters æ, ø and å to international letters to avoid unicode and return the new name.""" - nyt_navn = '' - for bogstav in navn: - if bogstav in 'æøå': - if bogstav == 'æ': - bogstav = 'ae' - elif bogstav == 'ø': - bogstav = 'oe' - elif bogstav == 'å': - bogstav = 'aa' - nyt_navn += bogstav - return nyt_navn + new_name = '' + for letter in name: + if letter in 'æøå': + if letter == 'æ': + letter = 'ae' + elif letter == 'ø': + letter = 'oe' + elif letter == 'å': + letter = 'aa' + new_name += letter + return new_name def check_arguments(): From fc51e59546699bc6b986fcc9ed8873068d49b544 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 2 Oct 2020 23:17:19 +0200 Subject: [PATCH 4/9] =?UTF-8?q?Move=20function=20"change=5F=C3=A6=C3=B8?= =?UTF-8?q?=C3=A5"=20from=20"add=5Fproduct.py"=20to=20"scraping.py"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - "add_product.py" now imports the function "change_æøå" from "scraping.py" --- tech_scraping/add_product.py | 17 +---------------- tech_scraping/scraping.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index c6b85ee7..98b49ca3 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -3,7 +3,7 @@ import requests from bs4 import BeautifulSoup import json -from scraping import change_name +from scraping import change_name, change_æøå import argparse @@ -101,21 +101,6 @@ def get_product_name(link): return None -def change_æøå(name): - """Change the letters æ, ø and å to international letters to avoid unicode and return the new name.""" - new_name = '' - for letter in name: - if letter in 'æøå': - if letter == 'æ': - letter = 'ae' - elif letter == 'ø': - letter = 'oe' - elif letter == 'å': - letter = 'aa' - new_name += letter - return new_name - - def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index 22cf632e..7af52d37 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -203,6 +203,21 @@ def change_name(name): return name +def change_æøå(name): + """Change the letters æ, ø and å to international letters to avoid unicode and return the new name.""" + new_name = '' + for letter in name: + if letter in 'æøå': + if letter == 'æ': + letter = 'ae' + elif letter == 'ø': + letter = 'oe' + elif letter == 'å': + letter = 'aa' + new_name += letter + return new_name + + class Komplett(Scraper): def get_info(self): self.name = self.html_soup.find('div', class_='product-main-info__info').h1.span.text.lower() From 96d70b3f5cd4f234d58c4e9d9586ba470a688c17 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 2 Oct 2020 23:26:56 +0200 Subject: [PATCH 5/9] =?UTF-8?q?Add=20function=20call=20to=20"change=5F?= =?UTF-8?q?=C3=A6=C3=B8=C3=A5"=20in=20the=20same=20line=20as=20function=20?= =?UTF-8?q?"change=5Fname"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- tech_scraping/scraping.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index 7af52d37..f052ce74 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -44,7 +44,7 @@ def __init__(self, category, URL): except Exception as err: logger.error(f'Failed in method "{self.__class__.__name__}.get_info()": {err}', exc_info=True) - self.name = change_name(self.name) + self.name = change_æøå(change_name(self.name)) self.date = str(datetime.today().strftime('%Y-%m-%d')) self.get_part_num() self.shorten_url() From 243333572a836455f3742af4303e6b1eee833c55 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Fri, 2 Oct 2020 23:39:47 +0200 Subject: [PATCH 6/9] Add ability to scrape and add product from Expert.dk #86 --- README.md | 4 +++- tech_scraping/README.md | 4 +++- tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++- tech_scraping/scraping.py | 10 ++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index a27e7074..66493b9c 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com and Power.dk**

+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk and Expert.dk**

**The Fakta scraper can scrape discounts from this week discounts.**
@@ -81,6 +81,8 @@ There is some optional arguments you can use when running add_product.py, these - --power +- --expert + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md index b7b85503..781874a5 100644 --- a/tech_scraping/README.md +++ b/tech_scraping/README.md @@ -1,4 +1,4 @@ -**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com and Power.dk** +**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk** # Table of contents - [First setup](#first-setup) @@ -69,4 +69,6 @@ There is some optional arguments you can use when running add_product.py, these - --power +- --expert + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name. diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index 98b49ca3..f1590f7e 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -64,6 +64,11 @@ def argparse_setup(): 'if this is the only optional flag', action="store_true") + parser.add_argument('--expert', + help='add only expert-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + return parser.parse_args() @@ -97,6 +102,8 @@ def get_product_name(link): return change_name(html_soup.find('h1', class_='product-title').text.lower()) elif URL_domain == 'www.power.dk': return change_name(html_soup.find('title').text.replace(' - Power.dk', '').lower()) + elif URL_domain == 'www.expert.dk': + return change_name(html_soup.find('meta', property='og:title')['content'].lower()) else: return None @@ -105,7 +112,7 @@ def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" json_object = json.loads('{}') - if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power: + if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert: if args.komplett: json_object.update({ f"{komplett_domain}": { @@ -196,6 +203,16 @@ def check_arguments(): "dates": {} } }) + if args.expert: + json_object.update({ + f"{expert_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) else: json_object = { f"{komplett_domain}": { @@ -260,6 +277,13 @@ def check_arguments(): "url": "" }, "dates": {} + }, + f"{expert_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} } } return json_object @@ -299,6 +323,8 @@ def find_domain(domain): return 'eBay' elif domain == 'www.power.dk': return 'Power' + elif domain == 'www.expert.dk': + return 'Expert' def add_to_scraper(kategori, link, url_domain): @@ -337,5 +363,6 @@ def main(kategori, link): amazon_domain = 'www.amazon.com' ebay_domain = 'www.ebay.com' power_domain = 'www.power.dk' + expert_domain = 'www.expert.dk' args = argparse_setup() main(args.category, args.url) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index f052ce74..c8394828 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -95,6 +95,8 @@ def get_part_num(self): self.part_num = self.URL.split('=')[1] elif self.URL_domain == 'www.power.dk': self.part_num = self.URL.split('/')[-2].replace('p-', '') + elif self.URL_domain == 'www.expert.dk': + self.part_num = self.URL.split('/')[-2].replace('p-', '') def check_part_num(self): """ @@ -167,6 +169,8 @@ def shorten_url(self): self.short_url = self.URL.split('?')[0] elif self.URL_domain == 'www.power.dk': self.short_url = f'https://www.power.dk/{self.URL.split("/")[3]}/p-{self.part_num}' + elif self.URL_domain == 'www.expert.dk': + self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}' def print_info(self): """Print info about the product in the terminal.""" @@ -285,6 +289,12 @@ def get_info(self): self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0] +class Expert(Scraper): + def get_info(self): + self.name = self.html_soup.find('meta', property='og:title')['content'].lower() + self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0] + + if __name__ == '__main__': logger = log_setup() Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#') From 16718c0022e4a38d446385315c05137a77830955 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Sat, 3 Oct 2020 00:05:28 +0200 Subject: [PATCH 7/9] Add ability to scrape and add product from mm-vision.dk #88 --- tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++- tech_scraping/scraping.py | 10 ++++++++++ 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index f1590f7e..7f427912 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -69,6 +69,11 @@ def argparse_setup(): 'if this is the only optional flag', action="store_true") + parser.add_argument('--mmvision', + help='add only mm-vision-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + return parser.parse_args() @@ -104,6 +109,8 @@ def get_product_name(link): return change_name(html_soup.find('title').text.replace(' - Power.dk', '').lower()) elif URL_domain == 'www.expert.dk': return change_name(html_soup.find('meta', property='og:title')['content'].lower()) + elif URL_domain == 'www.mm-vision.dk': + return change_name(html_soup.find('h1', itemprop='name').text.strip().lower()) else: return None @@ -112,7 +119,7 @@ def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" json_object = json.loads('{}') - if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert: + if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision: if args.komplett: json_object.update({ f"{komplett_domain}": { @@ -213,6 +220,16 @@ def check_arguments(): "dates": {} } }) + if args.mmvision: + json_object.update({ + f"{mmvision_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) else: json_object = { f"{komplett_domain}": { @@ -284,6 +301,13 @@ def check_arguments(): "url": "" }, "dates": {} + }, + f"{mmvision_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} } } return json_object @@ -325,6 +349,8 @@ def find_domain(domain): return 'Power' elif domain == 'www.expert.dk': return 'Expert' + elif domain == 'www.mm-vision.dk': + return 'MMVision' def add_to_scraper(kategori, link, url_domain): @@ -364,5 +390,6 @@ def main(kategori, link): ebay_domain = 'www.ebay.com' power_domain = 'www.power.dk' expert_domain = 'www.expert.dk' + mmvision_domain = 'www.mm-vision.dk' args = argparse_setup() main(args.category, args.url) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index c8394828..ae2d2719 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -97,6 +97,8 @@ def get_part_num(self): self.part_num = self.URL.split('/')[-2].replace('p-', '') elif self.URL_domain == 'www.expert.dk': self.part_num = self.URL.split('/')[-2].replace('p-', '') + elif self.URL_domain == 'www.mm-vision.dk': + self.part_num = self.html_soup.find('input', type='radio')['value'] def check_part_num(self): """ @@ -171,6 +173,8 @@ def shorten_url(self): self.short_url = f'https://www.power.dk/{self.URL.split("/")[3]}/p-{self.part_num}' elif self.URL_domain == 'www.expert.dk': self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}' + elif self.URL_domain == 'www.mm-vision.dk': + self.short_url = self.URL def print_info(self): """Print info about the product in the terminal.""" @@ -295,6 +299,12 @@ def get_info(self): self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0] +class MMVision(Scraper): + def get_info(self): + self.name = self.html_soup.find('h1', itemprop='name').text.strip().lower() + self.price = self.html_soup.find('h3', class_='product-price text-right').text.replace(',-', '').replace('.', '') + + if __name__ == '__main__': logger = log_setup() Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#') From 8ea535186eed6f2e5ab89c5884f23f7b4d6c6681 Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Sat, 3 Oct 2020 00:35:41 +0200 Subject: [PATCH 8/9] Add ability to scrape and add product from Coolshop.dk #89 - also add missing mm-vision to README aswell --- README.md | 6 +++++- tech_scraping/README.md | 6 +++++- tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++- tech_scraping/scraping.py | 10 ++++++++++ 4 files changed, 48 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 66493b9c..f5ae52cc 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk and Expert.dk**

+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**

**The Fakta scraper can scrape discounts from this week discounts.**
@@ -83,6 +83,10 @@ There is some optional arguments you can use when running add_product.py, these - --expert +- --mmvision + +- --coolshop + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md index 781874a5..b5f86756 100644 --- a/tech_scraping/README.md +++ b/tech_scraping/README.md @@ -1,4 +1,4 @@ -**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk** +**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk** # Table of contents - [First setup](#first-setup) @@ -71,4 +71,8 @@ There is some optional arguments you can use when running add_product.py, these - --expert +- --mmvision + +- --coolshop + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name. diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index 7f427912..e1f48ff9 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -74,6 +74,11 @@ def argparse_setup(): 'if this is the only optional flag', action="store_true") + parser.add_argument('--coolshop', + help='add only coolshop-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + return parser.parse_args() @@ -111,6 +116,8 @@ def get_product_name(link): return change_name(html_soup.find('meta', property='og:title')['content'].lower()) elif URL_domain == 'www.mm-vision.dk': return change_name(html_soup.find('h1', itemprop='name').text.strip().lower()) + elif URL_domain == 'www.coolshop.dk': + return change_name(html_soup.find('div', class_='thing-header').text.strip().lower()) else: return None @@ -119,7 +126,7 @@ def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" json_object = json.loads('{}') - if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision: + if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop: if args.komplett: json_object.update({ f"{komplett_domain}": { @@ -230,6 +237,16 @@ def check_arguments(): "dates": {} } }) + if args.coolshop: + json_object.update({ + f"{coolshop_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) else: json_object = { f"{komplett_domain}": { @@ -308,6 +325,13 @@ def check_arguments(): "url": "" }, "dates": {} + }, + f"{coolshop_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} } } return json_object @@ -351,6 +375,8 @@ def find_domain(domain): return 'Expert' elif domain == 'www.mm-vision.dk': return 'MMVision' + elif domain == 'www.coolshop.dk': + return 'Coolshop' def add_to_scraper(kategori, link, url_domain): @@ -391,5 +417,6 @@ def main(kategori, link): power_domain = 'www.power.dk' expert_domain = 'www.expert.dk' mmvision_domain = 'www.mm-vision.dk' + coolshop_domain = 'www.coolshop.dk' args = argparse_setup() main(args.category, args.url) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index ae2d2719..59808774 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -99,6 +99,8 @@ def get_part_num(self): self.part_num = self.URL.split('/')[-2].replace('p-', '') elif self.URL_domain == 'www.mm-vision.dk': self.part_num = self.html_soup.find('input', type='radio')['value'] + elif self.URL_domain == 'www.coolshop.dk': + self.part_num = self.html_soup.find_all('div', id='attributeSku')[1].text.strip() def check_part_num(self): """ @@ -175,6 +177,8 @@ def shorten_url(self): self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}' elif self.URL_domain == 'www.mm-vision.dk': self.short_url = self.URL + elif self.URL_domain == 'www.coolshop.dk': + self.short_url = f'https://www.coolshop.dk/produkt/{self.URL.split("/")[-2]}/' def print_info(self): """Print info about the product in the terminal.""" @@ -305,6 +309,12 @@ def get_info(self): self.price = self.html_soup.find('h3', class_='product-price text-right').text.replace(',-', '').replace('.', '') +class Coolshop(Scraper): + def get_info(self): + self.name = self.html_soup.find('div', class_='thing-header').text.strip().lower() + self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split('.')[0] + + if __name__ == '__main__': logger = log_setup() Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#') From da780a3780d68476bc0fca398705d5c7f604743c Mon Sep 17 00:00:00 2001 From: Crinibus <57172157+Crinibus@users.noreply.github.com> Date: Sat, 3 Oct 2020 01:45:02 +0200 Subject: [PATCH 9/9] Add ability to scrape and add product from Sharkgaming.dk #90 --- README.md | 4 +++- tech_scraping/README.md | 4 +++- tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++- tech_scraping/scraping.py | 10 ++++++++++ 4 files changed, 44 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f5ae52cc..1cf3ce66 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**

+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk**

**The Fakta scraper can scrape discounts from this week discounts.**
@@ -87,6 +87,8 @@ There is some optional arguments you can use when running add_product.py, these - --coolshop +- --sharkgaming + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md index b5f86756..b992bfba 100644 --- a/tech_scraping/README.md +++ b/tech_scraping/README.md @@ -1,4 +1,4 @@ -**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk** +**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk** # Table of contents - [First setup](#first-setup) @@ -75,4 +75,6 @@ There is some optional arguments you can use when running add_product.py, these - --coolshop +- --sharkgaming + When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name. diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py index e1f48ff9..e4670866 100644 --- a/tech_scraping/add_product.py +++ b/tech_scraping/add_product.py @@ -79,6 +79,11 @@ def argparse_setup(): 'if this is the only optional flag', action="store_true") + parser.add_argument('--sharkgaming', + help='add only sharkgaming-domain under the product-name,' + 'if this is the only optional flag', + action="store_true") + return parser.parse_args() @@ -118,6 +123,8 @@ def get_product_name(link): return change_name(html_soup.find('h1', itemprop='name').text.strip().lower()) elif URL_domain == 'www.coolshop.dk': return change_name(html_soup.find('div', class_='thing-header').text.strip().lower()) + elif URL_domain == 'www.sharkgaming.dk': + return change_name(html_soup.find('div', class_='product-name').text.strip().lower()) else: return None @@ -126,7 +133,7 @@ def check_arguments(): """Check if any of the optional domain arguments is giving to the script and returns those that are as one json-object.""" json_object = json.loads('{}') - if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop: + if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop or args.sharkgaming: if args.komplett: json_object.update({ f"{komplett_domain}": { @@ -247,6 +254,16 @@ def check_arguments(): "dates": {} } }) + if args.sharkgaming: + json_object.update({ + f"{sharkgaming_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} + } + }) else: json_object = { f"{komplett_domain}": { @@ -332,6 +349,13 @@ def check_arguments(): "url": "" }, "dates": {} + }, + f"{sharkgaming_domain}": { + "info": { + "part_num": "", + "url": "" + }, + "dates": {} } } return json_object @@ -377,6 +401,8 @@ def find_domain(domain): return 'MMVision' elif domain == 'www.coolshop.dk': return 'Coolshop' + elif domain == 'www.sharkgaming.dk': + return 'Sharkgaming' def add_to_scraper(kategori, link, url_domain): @@ -418,5 +444,6 @@ def main(kategori, link): expert_domain = 'www.expert.dk' mmvision_domain = 'www.mm-vision.dk' coolshop_domain = 'www.coolshop.dk' + sharkgaming_domain = 'www.sharkgaming.dk' args = argparse_setup() main(args.category, args.url) diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py index 59808774..a72080fe 100644 --- a/tech_scraping/scraping.py +++ b/tech_scraping/scraping.py @@ -101,6 +101,8 @@ def get_part_num(self): self.part_num = self.html_soup.find('input', type='radio')['value'] elif self.URL_domain == 'www.coolshop.dk': self.part_num = self.html_soup.find_all('div', id='attributeSku')[1].text.strip() + elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk': + self.part_num = 'Non existing on Sharkgaming' def check_part_num(self): """ @@ -179,6 +181,8 @@ def shorten_url(self): self.short_url = self.URL elif self.URL_domain == 'www.coolshop.dk': self.short_url = f'https://www.coolshop.dk/produkt/{self.URL.split("/")[-2]}/' + elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk': + self.short_url = self.URL def print_info(self): """Print info about the product in the terminal.""" @@ -315,6 +319,12 @@ def get_info(self): self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split('.')[0] +class Sharkgaming(Scraper): + def get_info(self): + self.name = self.html_soup.find('div', class_='product-name').text.strip().lower() + self.price = self.html_soup.find('span', class_='price').text.replace(' kr.', '').replace('.', '') + + if __name__ == '__main__': logger = log_setup() Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')