Skip to content

Commit

Permalink
Merge pull request #199 from Crinibus/error-when-url-is-missing-schema
Browse files Browse the repository at this point in the history
Raise exception when url has no schema
  • Loading branch information
Crinibus authored Nov 18, 2022
2 parents 0f412a3 + 9938cf6 commit fe12ce7
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 4 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ This is equivalent to the above:
python3 main.py -a -c <category> <category2> -u <url> <url2>
```

**OBS**: The url must have the ```https://``` part.<br/>
**OBS**: The url must have a schema like: ```https://``` or ```http://```.<br/>
**OBS**: If an error occures when adding a product, then the error might happen because the url has a ```&``` in it, when this happens then just put quotation marks around the url. This should solve the problem. If this doesn't solve the problem then summit a issue.<br/>

<br/>
Expand Down
12 changes: 10 additions & 2 deletions scraper/add_product.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,17 @@
from typing import List
import logging
from scraper.exceptions import WebsiteNotSupported
from scraper.exceptions import WebsiteNotSupported, URLMissingSchema
from scraper.scrape import Scraper
from scraper.filemanager import Filemanager
from scraper.domains import get_website_name, SUPPORTED_DOMAINS
from scraper.constants import URL_SCHEMES


def add_products(categories: List[str], urls: List[str]) -> None:
for category, url in zip(categories, urls):
try:
add_product(category, url)
except WebsiteNotSupported as err:
except (WebsiteNotSupported, URLMissingSchema) as err:
logging.getLogger(__name__).error(err)
print(err)

Expand All @@ -23,6 +24,9 @@ def add_product(category: str, url: str) -> None:
if website_name not in SUPPORTED_DOMAINS.keys():
raise WebsiteNotSupported(website_name)

if is_missing_url_schema(url):
raise URLMissingSchema(url)

print(f"Adding product with category '{category}' and url '{url}'")
logger.info(f"Adding product with category '{category}' and url '{url}'")

Expand Down Expand Up @@ -98,3 +102,7 @@ def check_if_product_exists_csv(product: Scraper) -> bool:
return True

return False


def is_missing_url_schema(url: str) -> bool:
return not any(schema in url for schema in URL_SCHEMES)
3 changes: 2 additions & 1 deletion scraper/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

REQUEST_COOKIES = {"cookies_are": "working"}


WEBSITE_COLORS = {
"komplett": "orange",
"proshop": "red",
Expand All @@ -23,3 +22,5 @@
"newegg": "#f7c20a",
"hifiklubben": "#231f20",
}

URL_SCHEMES = ("http://", "https://")
12 changes: 12 additions & 0 deletions scraper/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,19 @@
from scraper.constants import URL_SCHEMES


class WebsiteNotSupported(Exception):
def __init__(self, website_name: str, *args: object) -> None:
super().__init__(*args)
self.website_name = website_name

def __str__(self) -> str:
return f"Website '{self.website_name}' is currently not supported"


class URLMissingSchema(Exception):
def __init__(self, url, *args: object) -> None:
super().__init__(*args)
self.url = url

def __str__(self) -> str:
return f"Missing schema in url '{self.url}'. Consider prefixing the url with one of following schemes: {', '.join(URL_SCHEMES)}"

0 comments on commit fe12ce7

Please sign in to comment.