From aaa05f2ca274c7ee3238385ddc41825157c199ca Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 2 Oct 2020 22:19:54 +0200
Subject: [PATCH 1/9] =?UTF-8?q?Translate=20function=20name=20from=20"?=
=?UTF-8?q?=C3=A6ndre=5F=C3=A6=C3=B8=C3=A5"=20to=20"change=5F=C3=A6=C3=B8?=
=?UTF-8?q?=C3=A5"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
tech_scraping/add_product.py | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index 0b751ddc..be0a1ce1 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -101,7 +101,7 @@ def get_product_name(link):
return None
-def ændre_æøå(navn):
+def change_æøå(navn):
"""Change the letters æ, ø and å to international letters to avoid unicode and return the new name."""
nyt_navn = ''
for bogstav in navn:
@@ -334,9 +334,9 @@ def main(kategori, link):
print(f'Sorry, but I can\'t scrape from this domain: {URL_domain}')
return
- # Ændre æ, ø og/eller å
- kategori = ændre_æøå(kategori)
- produkt_navn = ændre_æøå(produkt_navn)
+ # Change æ, ø and/or å
+ kategori = Change_æøå(kategori)
+ produkt_navn = Change_æøå(produkt_navn)
save_json(kategori, produkt_navn)
add_to_scraper(kategori, link, URL_domain)
From 3bd2ec2ee170acd7b6a048697ea15f2def2ce51a Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 2 Oct 2020 22:22:45 +0200
Subject: [PATCH 2/9] Fix a small typo
---
tech_scraping/add_product.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index be0a1ce1..e1a89e61 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -335,8 +335,8 @@ def main(kategori, link):
return
# Change æ, ø and/or å
- kategori = Change_æøå(kategori)
- produkt_navn = Change_æøå(produkt_navn)
+ kategori = change_æøå(kategori)
+ produkt_navn = change_æøå(produkt_navn)
save_json(kategori, produkt_navn)
add_to_scraper(kategori, link, URL_domain)
From 268ea5b5306502e646144076a4d6b8dc074f3e6a Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 2 Oct 2020 23:14:52 +0200
Subject: [PATCH 3/9] =?UTF-8?q?Translate=20variable=20names=20in=20functio?=
=?UTF-8?q?n=20"change=5F=C3=A6=C3=B8=C3=A5"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
tech_scraping/add_product.py | 24 ++++++++++++------------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index e1a89e61..c6b85ee7 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -101,19 +101,19 @@ def get_product_name(link):
return None
-def change_æøå(navn):
+def change_æøå(name):
"""Change the letters æ, ø and å to international letters to avoid unicode and return the new name."""
- nyt_navn = ''
- for bogstav in navn:
- if bogstav in 'æøå':
- if bogstav == 'æ':
- bogstav = 'ae'
- elif bogstav == 'ø':
- bogstav = 'oe'
- elif bogstav == 'å':
- bogstav = 'aa'
- nyt_navn += bogstav
- return nyt_navn
+ new_name = ''
+ for letter in name:
+ if letter in 'æøå':
+ if letter == 'æ':
+ letter = 'ae'
+ elif letter == 'ø':
+ letter = 'oe'
+ elif letter == 'å':
+ letter = 'aa'
+ new_name += letter
+ return new_name
def check_arguments():
From fc51e59546699bc6b986fcc9ed8873068d49b544 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 2 Oct 2020 23:17:19 +0200
Subject: [PATCH 4/9] =?UTF-8?q?Move=20function=20"change=5F=C3=A6=C3=B8?=
=?UTF-8?q?=C3=A5"=20from=20"add=5Fproduct.py"=20to=20"scraping.py"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- "add_product.py" now imports the function "change_æøå" from "scraping.py"
---
tech_scraping/add_product.py | 17 +----------------
tech_scraping/scraping.py | 15 +++++++++++++++
2 files changed, 16 insertions(+), 16 deletions(-)
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index c6b85ee7..98b49ca3 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -3,7 +3,7 @@
import requests
from bs4 import BeautifulSoup
import json
-from scraping import change_name
+from scraping import change_name, change_æøå
import argparse
@@ -101,21 +101,6 @@ def get_product_name(link):
return None
-def change_æøå(name):
- """Change the letters æ, ø and å to international letters to avoid unicode and return the new name."""
- new_name = ''
- for letter in name:
- if letter in 'æøå':
- if letter == 'æ':
- letter = 'ae'
- elif letter == 'ø':
- letter = 'oe'
- elif letter == 'å':
- letter = 'aa'
- new_name += letter
- return new_name
-
-
def check_arguments():
"""Check if any of the optional domain arguments is giving to the script
and returns those that are as one json-object."""
diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py
index 22cf632e..7af52d37 100644
--- a/tech_scraping/scraping.py
+++ b/tech_scraping/scraping.py
@@ -203,6 +203,21 @@ def change_name(name):
return name
+def change_æøå(name):
+ """Change the letters æ, ø and å to international letters to avoid unicode and return the new name."""
+ new_name = ''
+ for letter in name:
+ if letter in 'æøå':
+ if letter == 'æ':
+ letter = 'ae'
+ elif letter == 'ø':
+ letter = 'oe'
+ elif letter == 'å':
+ letter = 'aa'
+ new_name += letter
+ return new_name
+
+
class Komplett(Scraper):
def get_info(self):
self.name = self.html_soup.find('div', class_='product-main-info__info').h1.span.text.lower()
From 96d70b3f5cd4f234d58c4e9d9586ba470a688c17 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 2 Oct 2020 23:26:56 +0200
Subject: [PATCH 5/9] =?UTF-8?q?Add=20function=20call=20to=20"change=5F?=
=?UTF-8?q?=C3=A6=C3=B8=C3=A5"=20in=20the=20same=20line=20as=20function=20?=
=?UTF-8?q?"change=5Fname"?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
tech_scraping/scraping.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py
index 7af52d37..f052ce74 100644
--- a/tech_scraping/scraping.py
+++ b/tech_scraping/scraping.py
@@ -44,7 +44,7 @@ def __init__(self, category, URL):
except Exception as err:
logger.error(f'Failed in method "{self.__class__.__name__}.get_info()": {err}', exc_info=True)
- self.name = change_name(self.name)
+ self.name = change_æøå(change_name(self.name))
self.date = str(datetime.today().strftime('%Y-%m-%d'))
self.get_part_num()
self.shorten_url()
From 243333572a836455f3742af4303e6b1eee833c55 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Fri, 2 Oct 2020 23:39:47 +0200
Subject: [PATCH 6/9] Add ability to scrape and add product from Expert.dk #86
---
README.md | 4 +++-
tech_scraping/README.md | 4 +++-
tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++-
tech_scraping/scraping.py | 10 ++++++++++
4 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index a27e7074..66493b9c 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com and Power.dk**
+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk and Expert.dk**
**The Fakta scraper can scrape discounts from this week discounts.**
@@ -81,6 +81,8 @@ There is some optional arguments you can use when running add_product.py, these
- --power
+- --expert
+
When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md
index b7b85503..781874a5 100644
--- a/tech_scraping/README.md
+++ b/tech_scraping/README.md
@@ -1,4 +1,4 @@
-**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com and Power.dk**
+**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk**
# Table of contents
- [First setup](#first-setup)
@@ -69,4 +69,6 @@ There is some optional arguments you can use when running add_product.py, these
- --power
+- --expert
+
When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index 98b49ca3..f1590f7e 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -64,6 +64,11 @@ def argparse_setup():
'if this is the only optional flag',
action="store_true")
+ parser.add_argument('--expert',
+ help='add only expert-domain under the product-name,'
+ 'if this is the only optional flag',
+ action="store_true")
+
return parser.parse_args()
@@ -97,6 +102,8 @@ def get_product_name(link):
return change_name(html_soup.find('h1', class_='product-title').text.lower())
elif URL_domain == 'www.power.dk':
return change_name(html_soup.find('title').text.replace(' - Power.dk', '').lower())
+ elif URL_domain == 'www.expert.dk':
+ return change_name(html_soup.find('meta', property='og:title')['content'].lower())
else:
return None
@@ -105,7 +112,7 @@ def check_arguments():
"""Check if any of the optional domain arguments is giving to the script
and returns those that are as one json-object."""
json_object = json.loads('{}')
- if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power:
+ if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert:
if args.komplett:
json_object.update({
f"{komplett_domain}": {
@@ -196,6 +203,16 @@ def check_arguments():
"dates": {}
}
})
+ if args.expert:
+ json_object.update({
+ f"{expert_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
+ }
+ })
else:
json_object = {
f"{komplett_domain}": {
@@ -260,6 +277,13 @@ def check_arguments():
"url": ""
},
"dates": {}
+ },
+ f"{expert_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
}
}
return json_object
@@ -299,6 +323,8 @@ def find_domain(domain):
return 'eBay'
elif domain == 'www.power.dk':
return 'Power'
+ elif domain == 'www.expert.dk':
+ return 'Expert'
def add_to_scraper(kategori, link, url_domain):
@@ -337,5 +363,6 @@ def main(kategori, link):
amazon_domain = 'www.amazon.com'
ebay_domain = 'www.ebay.com'
power_domain = 'www.power.dk'
+ expert_domain = 'www.expert.dk'
args = argparse_setup()
main(args.category, args.url)
diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py
index f052ce74..c8394828 100644
--- a/tech_scraping/scraping.py
+++ b/tech_scraping/scraping.py
@@ -95,6 +95,8 @@ def get_part_num(self):
self.part_num = self.URL.split('=')[1]
elif self.URL_domain == 'www.power.dk':
self.part_num = self.URL.split('/')[-2].replace('p-', '')
+ elif self.URL_domain == 'www.expert.dk':
+ self.part_num = self.URL.split('/')[-2].replace('p-', '')
def check_part_num(self):
"""
@@ -167,6 +169,8 @@ def shorten_url(self):
self.short_url = self.URL.split('?')[0]
elif self.URL_domain == 'www.power.dk':
self.short_url = f'https://www.power.dk/{self.URL.split("/")[3]}/p-{self.part_num}'
+ elif self.URL_domain == 'www.expert.dk':
+ self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}'
def print_info(self):
"""Print info about the product in the terminal."""
@@ -285,6 +289,12 @@ def get_info(self):
self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0]
+class Expert(Scraper):
+ def get_info(self):
+ self.name = self.html_soup.find('meta', property='og:title')['content'].lower()
+ self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0]
+
+
if __name__ == '__main__':
logger = log_setup()
Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')
From 16718c0022e4a38d446385315c05137a77830955 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Sat, 3 Oct 2020 00:05:28 +0200
Subject: [PATCH 7/9] Add ability to scrape and add product from mm-vision.dk
#88
---
tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++-
tech_scraping/scraping.py | 10 ++++++++++
2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index f1590f7e..7f427912 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -69,6 +69,11 @@ def argparse_setup():
'if this is the only optional flag',
action="store_true")
+ parser.add_argument('--mmvision',
+ help='add only mm-vision-domain under the product-name,'
+ 'if this is the only optional flag',
+ action="store_true")
+
return parser.parse_args()
@@ -104,6 +109,8 @@ def get_product_name(link):
return change_name(html_soup.find('title').text.replace(' - Power.dk', '').lower())
elif URL_domain == 'www.expert.dk':
return change_name(html_soup.find('meta', property='og:title')['content'].lower())
+ elif URL_domain == 'www.mm-vision.dk':
+ return change_name(html_soup.find('h1', itemprop='name').text.strip().lower())
else:
return None
@@ -112,7 +119,7 @@ def check_arguments():
"""Check if any of the optional domain arguments is giving to the script
and returns those that are as one json-object."""
json_object = json.loads('{}')
- if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert:
+ if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision:
if args.komplett:
json_object.update({
f"{komplett_domain}": {
@@ -213,6 +220,16 @@ def check_arguments():
"dates": {}
}
})
+ if args.mmvision:
+ json_object.update({
+ f"{mmvision_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
+ }
+ })
else:
json_object = {
f"{komplett_domain}": {
@@ -284,6 +301,13 @@ def check_arguments():
"url": ""
},
"dates": {}
+ },
+ f"{mmvision_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
}
}
return json_object
@@ -325,6 +349,8 @@ def find_domain(domain):
return 'Power'
elif domain == 'www.expert.dk':
return 'Expert'
+ elif domain == 'www.mm-vision.dk':
+ return 'MMVision'
def add_to_scraper(kategori, link, url_domain):
@@ -364,5 +390,6 @@ def main(kategori, link):
ebay_domain = 'www.ebay.com'
power_domain = 'www.power.dk'
expert_domain = 'www.expert.dk'
+ mmvision_domain = 'www.mm-vision.dk'
args = argparse_setup()
main(args.category, args.url)
diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py
index c8394828..ae2d2719 100644
--- a/tech_scraping/scraping.py
+++ b/tech_scraping/scraping.py
@@ -97,6 +97,8 @@ def get_part_num(self):
self.part_num = self.URL.split('/')[-2].replace('p-', '')
elif self.URL_domain == 'www.expert.dk':
self.part_num = self.URL.split('/')[-2].replace('p-', '')
+ elif self.URL_domain == 'www.mm-vision.dk':
+ self.part_num = self.html_soup.find('input', type='radio')['value']
def check_part_num(self):
"""
@@ -171,6 +173,8 @@ def shorten_url(self):
self.short_url = f'https://www.power.dk/{self.URL.split("/")[3]}/p-{self.part_num}'
elif self.URL_domain == 'www.expert.dk':
self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}'
+ elif self.URL_domain == 'www.mm-vision.dk':
+ self.short_url = self.URL
def print_info(self):
"""Print info about the product in the terminal."""
@@ -295,6 +299,12 @@ def get_info(self):
self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split(',')[0]
+class MMVision(Scraper):
+ def get_info(self):
+ self.name = self.html_soup.find('h1', itemprop='name').text.strip().lower()
+ self.price = self.html_soup.find('h3', class_='product-price text-right').text.replace(',-', '').replace('.', '')
+
+
if __name__ == '__main__':
logger = log_setup()
Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')
From 8ea535186eed6f2e5ab89c5884f23f7b4d6c6681 Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Sat, 3 Oct 2020 00:35:41 +0200
Subject: [PATCH 8/9] Add ability to scrape and add product from Coolshop.dk
#89
- also add missing mm-vision to README aswell
---
README.md | 6 +++++-
tech_scraping/README.md | 6 +++++-
tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++-
tech_scraping/scraping.py | 10 ++++++++++
4 files changed, 48 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index 66493b9c..f5ae52cc 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk and Expert.dk**
+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**
**The Fakta scraper can scrape discounts from this week discounts.**
@@ -83,6 +83,10 @@ There is some optional arguments you can use when running add_product.py, these
- --expert
+- --mmvision
+
+- --coolshop
+
When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md
index 781874a5..b5f86756 100644
--- a/tech_scraping/README.md
+++ b/tech_scraping/README.md
@@ -1,4 +1,4 @@
-**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk**
+**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**
# Table of contents
- [First setup](#first-setup)
@@ -71,4 +71,8 @@ There is some optional arguments you can use when running add_product.py, these
- --expert
+- --mmvision
+
+- --coolshop
+
When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index 7f427912..e1f48ff9 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -74,6 +74,11 @@ def argparse_setup():
'if this is the only optional flag',
action="store_true")
+ parser.add_argument('--coolshop',
+ help='add only coolshop-domain under the product-name,'
+ 'if this is the only optional flag',
+ action="store_true")
+
return parser.parse_args()
@@ -111,6 +116,8 @@ def get_product_name(link):
return change_name(html_soup.find('meta', property='og:title')['content'].lower())
elif URL_domain == 'www.mm-vision.dk':
return change_name(html_soup.find('h1', itemprop='name').text.strip().lower())
+ elif URL_domain == 'www.coolshop.dk':
+ return change_name(html_soup.find('div', class_='thing-header').text.strip().lower())
else:
return None
@@ -119,7 +126,7 @@ def check_arguments():
"""Check if any of the optional domain arguments is giving to the script
and returns those that are as one json-object."""
json_object = json.loads('{}')
- if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision:
+ if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop:
if args.komplett:
json_object.update({
f"{komplett_domain}": {
@@ -230,6 +237,16 @@ def check_arguments():
"dates": {}
}
})
+ if args.coolshop:
+ json_object.update({
+ f"{coolshop_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
+ }
+ })
else:
json_object = {
f"{komplett_domain}": {
@@ -308,6 +325,13 @@ def check_arguments():
"url": ""
},
"dates": {}
+ },
+ f"{coolshop_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
}
}
return json_object
@@ -351,6 +375,8 @@ def find_domain(domain):
return 'Expert'
elif domain == 'www.mm-vision.dk':
return 'MMVision'
+ elif domain == 'www.coolshop.dk':
+ return 'Coolshop'
def add_to_scraper(kategori, link, url_domain):
@@ -391,5 +417,6 @@ def main(kategori, link):
power_domain = 'www.power.dk'
expert_domain = 'www.expert.dk'
mmvision_domain = 'www.mm-vision.dk'
+ coolshop_domain = 'www.coolshop.dk'
args = argparse_setup()
main(args.category, args.url)
diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py
index ae2d2719..59808774 100644
--- a/tech_scraping/scraping.py
+++ b/tech_scraping/scraping.py
@@ -99,6 +99,8 @@ def get_part_num(self):
self.part_num = self.URL.split('/')[-2].replace('p-', '')
elif self.URL_domain == 'www.mm-vision.dk':
self.part_num = self.html_soup.find('input', type='radio')['value']
+ elif self.URL_domain == 'www.coolshop.dk':
+ self.part_num = self.html_soup.find_all('div', id='attributeSku')[1].text.strip()
def check_part_num(self):
"""
@@ -175,6 +177,8 @@ def shorten_url(self):
self.short_url = f'https://www.expert.dk/{self.URL.split("/")[3]}/p-{self.part_num}'
elif self.URL_domain == 'www.mm-vision.dk':
self.short_url = self.URL
+ elif self.URL_domain == 'www.coolshop.dk':
+ self.short_url = f'https://www.coolshop.dk/produkt/{self.URL.split("/")[-2]}/'
def print_info(self):
"""Print info about the product in the terminal."""
@@ -305,6 +309,12 @@ def get_info(self):
self.price = self.html_soup.find('h3', class_='product-price text-right').text.replace(',-', '').replace('.', '')
+class Coolshop(Scraper):
+ def get_info(self):
+ self.name = self.html_soup.find('div', class_='thing-header').text.strip().lower()
+ self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split('.')[0]
+
+
if __name__ == '__main__':
logger = log_setup()
Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')
From da780a3780d68476bc0fca398705d5c7f604743c Mon Sep 17 00:00:00 2001
From: Crinibus <57172157+Crinibus@users.noreply.github.com>
Date: Sat, 3 Oct 2020 01:45:02 +0200
Subject: [PATCH 9/9] Add ability to scrape and add product from Sharkgaming.dk
#90
---
README.md | 4 +++-
tech_scraping/README.md | 4 +++-
tech_scraping/add_product.py | 29 ++++++++++++++++++++++++++++-
tech_scraping/scraping.py | 10 ++++++++++
4 files changed, 44 insertions(+), 3 deletions(-)
diff --git a/README.md b/README.md
index f5ae52cc..1cf3ce66 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
-**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**
+**The tech scraper can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk**
**The Fakta scraper can scrape discounts from this week discounts.**
@@ -87,6 +87,8 @@ There is some optional arguments you can use when running add_product.py, these
- --coolshop
+- --sharkgaming
+
When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/README.md b/tech_scraping/README.md
index b5f86756..b992bfba 100644
--- a/tech_scraping/README.md
+++ b/tech_scraping/README.md
@@ -1,4 +1,4 @@
-**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk and Coolshop.dk**
+**This program can scrape prices on products from Komplett.dk, Proshop.dk, Computersalg.dk, Elgiganten.dk, AvXperten.dk, Av-Cables.dk, Amazon.com, eBay.com, Power.dk, Expert.dk, MM-Vision.dk, Coolshop.dk and Sharkgaming.dk**
# Table of contents
- [First setup](#first-setup)
@@ -75,4 +75,6 @@ There is some optional arguments you can use when running add_product.py, these
- --coolshop
+- --sharkgaming
+
When using one or more of "domain" arguments, only the chosen domains gets added to records.json under the product name.
diff --git a/tech_scraping/add_product.py b/tech_scraping/add_product.py
index e1f48ff9..e4670866 100644
--- a/tech_scraping/add_product.py
+++ b/tech_scraping/add_product.py
@@ -79,6 +79,11 @@ def argparse_setup():
'if this is the only optional flag',
action="store_true")
+ parser.add_argument('--sharkgaming',
+ help='add only sharkgaming-domain under the product-name,'
+ 'if this is the only optional flag',
+ action="store_true")
+
return parser.parse_args()
@@ -118,6 +123,8 @@ def get_product_name(link):
return change_name(html_soup.find('h1', itemprop='name').text.strip().lower())
elif URL_domain == 'www.coolshop.dk':
return change_name(html_soup.find('div', class_='thing-header').text.strip().lower())
+ elif URL_domain == 'www.sharkgaming.dk':
+ return change_name(html_soup.find('div', class_='product-name').text.strip().lower())
else:
return None
@@ -126,7 +133,7 @@ def check_arguments():
"""Check if any of the optional domain arguments is giving to the script
and returns those that are as one json-object."""
json_object = json.loads('{}')
- if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop:
+ if args.komplett or args.proshop or args.computersalg or args.elgiganten or args.avxperten or args.avcables or args.amazon or args.ebay or args.power or args.expert or args.mmvision or args.coolshop or args.sharkgaming:
if args.komplett:
json_object.update({
f"{komplett_domain}": {
@@ -247,6 +254,16 @@ def check_arguments():
"dates": {}
}
})
+ if args.sharkgaming:
+ json_object.update({
+ f"{sharkgaming_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
+ }
+ })
else:
json_object = {
f"{komplett_domain}": {
@@ -332,6 +349,13 @@ def check_arguments():
"url": ""
},
"dates": {}
+ },
+ f"{sharkgaming_domain}": {
+ "info": {
+ "part_num": "",
+ "url": ""
+ },
+ "dates": {}
}
}
return json_object
@@ -377,6 +401,8 @@ def find_domain(domain):
return 'MMVision'
elif domain == 'www.coolshop.dk':
return 'Coolshop'
+ elif domain == 'www.sharkgaming.dk':
+ return 'Sharkgaming'
def add_to_scraper(kategori, link, url_domain):
@@ -418,5 +444,6 @@ def main(kategori, link):
expert_domain = 'www.expert.dk'
mmvision_domain = 'www.mm-vision.dk'
coolshop_domain = 'www.coolshop.dk'
+ sharkgaming_domain = 'www.sharkgaming.dk'
args = argparse_setup()
main(args.category, args.url)
diff --git a/tech_scraping/scraping.py b/tech_scraping/scraping.py
index 59808774..a72080fe 100644
--- a/tech_scraping/scraping.py
+++ b/tech_scraping/scraping.py
@@ -101,6 +101,8 @@ def get_part_num(self):
self.part_num = self.html_soup.find('input', type='radio')['value']
elif self.URL_domain == 'www.coolshop.dk':
self.part_num = self.html_soup.find_all('div', id='attributeSku')[1].text.strip()
+ elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk':
+ self.part_num = 'Non existing on Sharkgaming'
def check_part_num(self):
"""
@@ -179,6 +181,8 @@ def shorten_url(self):
self.short_url = self.URL
elif self.URL_domain == 'www.coolshop.dk':
self.short_url = f'https://www.coolshop.dk/produkt/{self.URL.split("/")[-2]}/'
+ elif self.URL_domain == 'www.sharkgaming.dk' or self.URL_domain == 'sharkgaming.dk':
+ self.short_url = self.URL
def print_info(self):
"""Print info about the product in the terminal."""
@@ -315,6 +319,12 @@ def get_info(self):
self.price = self.html_soup.find('meta', property='product:price:amount')['content'].split('.')[0]
+class Sharkgaming(Scraper):
+ def get_info(self):
+ self.name = self.html_soup.find('div', class_='product-name').text.strip().lower()
+ self.price = self.html_soup.find('span', class_='price').text.replace(' kr.', '').replace('.', '')
+
+
if __name__ == '__main__':
logger = log_setup()
Komplett('ssd', 'https://www.komplett.dk/product/1133452/hardware/lagring/harddiskssd/ssd-m2/corsair-force-series-mp600-1tb-m2-ssd#')