From 8ddfebe134860a9c0b23f74e3e1c3823357b42c8 Mon Sep 17 00:00:00 2001 From: Corentin Garcia Date: Sat, 17 Jun 2023 22:45:51 +0200 Subject: [PATCH] fix: fix some media issues --- eddrit/__init__.py | 2 +- eddrit/const.py | 10 ----- eddrit/reddit/content_parser/flair.py | 12 +++--- eddrit/reddit/content_parser/media.py | 25 +++++-------- eddrit/reddit/content_parser/video_parsers.py | 37 ++++++++++++++++--- eddrit/reddit/parser.py | 21 +++++++---- eddrit/utils/media.py | 23 ------------ eddrit/utils/urls.py | 7 ++++ pyproject.toml | 2 +- templates/macros/comments.html | 6 +-- templates/macros/post.html | 2 +- tests/utils/test_media.py | 14 ------- 12 files changed, 72 insertions(+), 89 deletions(-) delete mode 100644 eddrit/const.py delete mode 100644 eddrit/utils/media.py create mode 100644 eddrit/utils/urls.py delete mode 100644 tests/utils/test_media.py diff --git a/eddrit/__init__.py b/eddrit/__init__.py index e12f457..480b0e4 100644 --- a/eddrit/__init__.py +++ b/eddrit/__init__.py @@ -2,7 +2,7 @@ from loguru import logger from eddrit import config -__version__ = "0.5.6" +__version__ = "0.5.7" logger.remove() logger.add(sys.stderr, level=config.LOG_LEVEL) diff --git a/eddrit/const.py b/eddrit/const.py deleted file mode 100644 index 2c3fa64..0000000 --- a/eddrit/const.py +++ /dev/null @@ -1,10 +0,0 @@ -# List of image hosting domains. -# Used to force parsing as media -# if the post is a link post instead of a media post. -MEDIA_HOSTING_DOMAINS = ["imgur.com"] - -# List of domains that have a special handling for their embed -# and should not be parsed with the generic embed code -DOMAINS_WITH_SPECIAL_EMBED_HANDLING = ["twitch.tv"] - -STATIC_RES_PATH_REPLACEMENT = "$STATIC_RES_PATH" diff --git a/eddrit/reddit/content_parser/flair.py b/eddrit/reddit/content_parser/flair.py index 7a921e2..37d147c 100644 --- a/eddrit/reddit/content_parser/flair.py +++ b/eddrit/reddit/content_parser/flair.py @@ -10,7 +10,7 @@ def get_post_flair(api_post_data: Dict[Hashable, Any]) -> Optional[models.Flair] text_color = ( "black" if api_post_data["link_flair_text_color"] == "dark" else "white" ) - bg_color = api_post_data["link_flair_background_color"] + bg_color = api_post_data["link_flair_background_color"] or "lightblue" if api_post_data.get("is_original_content", False): flair_components.append( @@ -55,14 +55,12 @@ def get_user_flair(api_post_data: Dict[Hashable, Any]) -> Optional[models.Flair] flair_components = [] # Background color - bg_color = api_post_data["author_flair_background_color"] - if not bg_color or bg_color == "#ffffff": - bg_color = "#dadada" + bg_color = api_post_data["author_flair_background_color"] or "lightblue" # Text color - text_color = api_post_data["author_flair_text_color"] - if not text_color: - text_color = "#0000" + text_color = ( + "black" if api_post_data["author_flair_text_color"] == "dark" else "white" + ) if api_post_data.get("author_flair_richtext"): for part in api_post_data.get("author_flair_richtext", []): diff --git a/eddrit/reddit/content_parser/media.py b/eddrit/reddit/content_parser/media.py index e9b06b7..5e15431 100644 --- a/eddrit/reddit/content_parser/media.py +++ b/eddrit/reddit/content_parser/media.py @@ -4,18 +4,16 @@ from eddrit import models -from eddrit.utils.media import ( - post_is_from_domain, -) from loguru import logger from eddrit.reddit.content_parser import video_parsers +from eddrit.utils.urls import get_domain_and_suffix_from_url def _post_is_an_imgur_gif(api_post_data: Dict[Hashable, Any]) -> bool: """Check if a post is an imgur gif by checking domain and url file extension.""" return ( - post_is_from_domain(api_post_data["domain"], "imgur.com") + get_domain_and_suffix_from_url(api_post_data["domain"]) == "imgur.com" and ".gif" in api_post_data["url"] ) @@ -111,19 +109,14 @@ def get_post_video_content( video_parsers.get_reddit_video_preview, ] - # Special case for twitch, the embedly embed - # Content-Security-Policy prevents including it - if post_is_from_domain(api_post_data["domain"], "twitch.tv"): - parsers.append(video_parsers.get_twitch_embed) + post_domain = get_domain_and_suffix_from_url(api_post_data["domain"]) - # Special case for imgur gif/gifv, it's easier to get the mp4 directly from the URL - if _post_is_an_imgur_gif(api_post_data): - parsers.append(video_parsers.get_imgur_gif) - - # Special case for gfycat, some old links are not embed - # but it can be converted to it. - if post_is_from_domain(api_post_data["domain"], "gfycat.com"): - parsers.append(video_parsers.get_gfycat_embed) + # Special case for some embeds + domains_with_special_embed_handling = ( + video_parsers.get_domains_with_special_embed_handling() + ) + if post_domain in domains_with_special_embed_handling.keys(): + parsers.append(domains_with_special_embed_handling[post_domain]) parsed_results: list[models.PostVideo | models.EmbedPostContent] = [] for parser in parsers: diff --git a/eddrit/reddit/content_parser/video_parsers.py b/eddrit/reddit/content_parser/video_parsers.py index 5cc896b..fea566f 100644 --- a/eddrit/reddit/content_parser/video_parsers.py +++ b/eddrit/reddit/content_parser/video_parsers.py @@ -1,13 +1,20 @@ import html -from typing import Any, Dict, Hashable +import re +from typing import Any, Callable, Dict, Hashable import lxml.html +import tldextract from eddrit import models -from eddrit.utils.media import domain_has_special_embed_handling from eddrit.utils.middlewares import get_current_host +def _domain_has_special_embed_handling(domain: str) -> bool: + """Check if the given domain is a domain that has a special code for embed handling.""" + _, domain, suffix = tldextract.extract(domain) + return f"{domain}.{suffix}" in get_domains_with_special_embed_handling().keys() + + def _cleanup_embed(content: str) -> str: """Cleanup embed content for embed posts""" content_parsed = lxml.html.fromstring(content) @@ -20,13 +27,31 @@ def _cleanup_embed(content: str) -> str: return lxml.html.tostring(content_parsed).decode("utf-8") +def get_domains_with_special_embed_handling() -> dict[str, Callable]: + """Return dict of domain associated with parsing function + of domains that have a special handling for their embed + and should not be parsed with the generic embed code""" + return {"twitch.tv": get_twitch_embed, "gfycat.com": get_gfycat_embed} + + def get_twitch_embed(api_post_data: Dict[Hashable, Any]) -> models.EmbedPostContent: """Fetch twitch embed directly as the one in the API has a Content-Security-Policy preventing including it. """ - embed_url = api_post_data["url"].replace( - "clips.twitch.tv/", "clips.twitch.tv/embed?clip=" - ) + + # There are two format of clips URLs + if "clips.twitch.tv" in api_post_data: + embed_url = api_post_data["url"].replace( + "clips.twitch.tv/", "clips.twitch.tv/embed?clip=" + ) + elif regex_clip_id := re.search( + "https://www.twitch.tv/.*/clip/(.*)", api_post_data["url"] + ): + clip_id = regex_clip_id.groups(0)[0] + embed_url = f"https://clips.twitch.tv/embed?clip={clip_id}" + else: + raise ValueError("Cannot parse Twitch embed") + parent = get_current_host() embed_code = f'' return models.EmbedPostContent( @@ -71,7 +96,7 @@ def get_imgur_gif(api_post_data: Dict[Hashable, Any]) -> models.PostVideo: def get_embed_content(api_post_data: Dict[Hashable, Any]) -> models.EmbedPostContent: - if domain_has_special_embed_handling(api_post_data["url"]): + if _domain_has_special_embed_handling(api_post_data["url"]): raise ValueError("The post domain cannot be parsed with get_embed_content") embed_data = api_post_data["secure_media"]["oembed"] diff --git a/eddrit/reddit/parser.py b/eddrit/reddit/parser.py index 246dc3d..181f5c2 100644 --- a/eddrit/reddit/parser.py +++ b/eddrit/reddit/parser.py @@ -5,7 +5,6 @@ import timeago from eddrit import models -from eddrit.const import STATIC_RES_PATH_REPLACEMENT from eddrit.reddit.content_parser.flair import get_post_flair, get_user_flair from eddrit.reddit.content_parser.media import ( get_post_gallery_content, @@ -14,7 +13,16 @@ post_has_video_content, ) from eddrit.utils.math import pretty_big_num -from eddrit.utils.media import is_media_hosting_domain +from eddrit.utils.urls import get_domain_and_suffix_from_url + +# Constant used in templates to be replaced by the static path +STATIC_RES_PATH_REPLACEMENT = "$STATIC_RES_PATH" + +# Domains that may be used in post of type link but that are majorly used for image hosting and should be parsed as such +IMAGE_HOSTING_DOMAINS = ["imgur.com"] + +# Media domains to display as links (embed that cannot be displayed, scripts needed etc.) +MEDIA_DOMAINS_TO_DISPLAY_AS_LINK = ["tiktok.com"] def get_post_content(api_post_data: Dict[Hashable, Any]) -> models.PostContentBase: @@ -35,13 +43,12 @@ def get_post_content(api_post_data: Dict[Hashable, Any]) -> models.PostContentBa # Media posts hint = api_post_data.get("post_hint") has_video_content = post_has_video_content(api_post_data) + post_domain = get_domain_and_suffix_from_url(api_post_data["domain"]) if ( - hint == "image" - or hint == "hosted:video" - or hint == "rich:video" - or (hint == "link" and is_media_hosting_domain(api_post_data["domain"])) + hint in ["image", "hosted:video", "rich:video"] + or (hint == "link" and post_domain in IMAGE_HOSTING_DOMAINS) or has_video_content - ): + ) and post_domain not in MEDIA_DOMAINS_TO_DISPLAY_AS_LINK: # Check if image has video (then consider video) else consider image if has_video_content: return get_post_video_content(api_post_data) diff --git a/eddrit/utils/media.py b/eddrit/utils/media.py deleted file mode 100644 index 355138b..0000000 --- a/eddrit/utils/media.py +++ /dev/null @@ -1,23 +0,0 @@ -import tldextract - -from eddrit import const - - -def is_media_hosting_domain(image_domain: str) -> bool: - """ - Check if the given domain is known to be an media hosting domain like imgur. - """ - _, domain, suffix = tldextract.extract(image_domain) - return f"{domain}.{suffix}" in const.MEDIA_HOSTING_DOMAINS - - -def post_is_from_domain(post_domain: str, domain_to_check: str) -> bool: - """Check if a post is from a given domain""" - _, domain, suffix = tldextract.extract(post_domain) - return f"{domain}.{suffix}" == domain_to_check - - -def domain_has_special_embed_handling(domain: str) -> bool: - """Check if the given domain is a domain that has a special code for embed handling.""" - _, domain, suffix = tldextract.extract(domain) - return f"{domain}.{suffix}" in const.DOMAINS_WITH_SPECIAL_EMBED_HANDLING diff --git a/eddrit/utils/urls.py b/eddrit/utils/urls.py new file mode 100644 index 0000000..5792624 --- /dev/null +++ b/eddrit/utils/urls.py @@ -0,0 +1,7 @@ +import tldextract + + +def get_domain_and_suffix_from_url(url: str) -> str: + """Get domain name and suffix from url""" + _, domain, suffix = tldextract.extract(url) + return f"{domain}.{suffix}" diff --git a/pyproject.toml b/pyproject.toml index 081a0ce..cf05d31 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "eddrit" -version = "0.5.6" +version = "0.5.7" description = "Alternative Reddit frontend" authors = ["corenting "] license = "MIT" diff --git a/templates/macros/comments.html b/templates/macros/comments.html index b4e79ed..b4763ab 100644 --- a/templates/macros/comments.html +++ b/templates/macros/comments.html @@ -3,21 +3,21 @@ {% macro render_author(comment, subreddit_name) %} {% set author_tag = '' %} {% if comment.is_submitter %} - + {{ comment.author.name }} {% set author_tag = '[S]' %} {% elif comment.is_admin %} - + {{ comment.author.name }} {% set author_tag = '[A]' %} {% elif comment.is_moderator %} - + {{ comment.author.name }} diff --git a/templates/macros/post.html b/templates/macros/post.html index d7e8bd8..79ec4b6 100644 --- a/templates/macros/post.html +++ b/templates/macros/post.html @@ -122,7 +122,7 @@ {% endif %} {% if post.content.type.value == 'video' %} -
diff --git a/tests/utils/test_media.py b/tests/utils/test_media.py deleted file mode 100644 index c5539a3..0000000 --- a/tests/utils/test_media.py +++ /dev/null @@ -1,14 +0,0 @@ -import pytest - -from eddrit.utils import media - - -@pytest.mark.parametrize( - "domain,expected", - [ - ("imgur.com", True), - ("github.com", False), - ], -) -def test_is_image_or_video_host(domain: str, expected: bool) -> None: - assert media.is_media_hosting_domain(domain) == expected