From 0ca786fb963d70980aed5f73d2c1579aa3e8bfb7 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Mon, 11 Dec 2023 04:22:00 +0000 Subject: [PATCH 1/3] add additional case for scraped image parsing --- mealie/services/scraper/cleaner.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py index f4bfc0ad708..f157c4d9c6c 100644 --- a/mealie/services/scraper/cleaner.py +++ b/mealie/services/scraper/cleaner.py @@ -108,6 +108,8 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image return [image] case [str(_), *_]: return [x for x in image if x] # Only return non-null strings in list + case [{"@id": str(_)}, *_]: + return [x["@id"] for x in image] case [{"url": str(_)}, *_]: return [x["url"] for x in image] case {"url": str(image)}: From f4e0c6ec5560e3968b78ab81b469d720954ea654 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Mon, 11 Dec 2023 04:23:11 +0000 Subject: [PATCH 2/3] made scraper more fault tolerant for missing images --- mealie/services/scraper/cleaner.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py index f157c4d9c6c..b5382ad85ba 100644 --- a/mealie/services/scraper/cleaner.py +++ b/mealie/services/scraper/cleaner.py @@ -115,7 +115,8 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image case {"url": str(image)}: return [image] case _: - raise TypeError(f"Unexpected type for image: {type(image)}, {image}") + logger.exception(f"Unexpected type for image: {type(image)}, {image}") + return [default] def clean_instructions(steps_object: list | dict | str, default: list | None = None) -> list[dict]: From c07e0c33a517e5a2dc179f944b9e141256a5c7e4 Mon Sep 17 00:00:00 2001 From: Michael Genson <71845777+michael-genson@users.noreply.github.com> Date: Mon, 11 Dec 2023 04:25:22 +0000 Subject: [PATCH 3/3] re-ordered case to favor better implementations --- mealie/services/scraper/cleaner.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/mealie/services/scraper/cleaner.py b/mealie/services/scraper/cleaner.py index b5382ad85ba..728fe83642d 100644 --- a/mealie/services/scraper/cleaner.py +++ b/mealie/services/scraper/cleaner.py @@ -89,10 +89,10 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image image attempts to parse the image field from a recipe and return a string. Currenty Supported Structures: - - `https://exmaple.com` - A string - - `{ "url": "https://exmaple.com" }` - A dictionary with a `url` key - - `["https://exmaple.com"]` - A list of strings - - `[{ "url": "https://exmaple.com" }]` - A list of dictionaries with a `url` key + - `https://example.com` - A string + - `{ "url": "https://example.com" }` - A dictionary with a `url` key + - `["https://example.com"]` - A list of strings + - `[{ "url": "https://example.com" }]` - A list of dictionaries with a `url` key Raises: TypeError: If the image field is not a supported type a TypeError is raised. @@ -108,12 +108,12 @@ def clean_image(image: str | list | dict | None = None, default: str = "no image return [image] case [str(_), *_]: return [x for x in image if x] # Only return non-null strings in list - case [{"@id": str(_)}, *_]: - return [x["@id"] for x in image] case [{"url": str(_)}, *_]: return [x["url"] for x in image] case {"url": str(image)}: return [image] + case [{"@id": str(_)}, *_]: + return [x["@id"] for x in image] case _: logger.exception(f"Unexpected type for image: {type(image)}, {image}") return [default]