From 8ddfebe134860a9c0b23f74e3e1c3823357b42c8 Mon Sep 17 00:00:00 2001
From: Corentin Garcia <corenting@gmail.com>
Date: Sat, 17 Jun 2023 22:45:51 +0200
Subject: [PATCH] fix: fix some media issues

---
 eddrit/__init__.py                            |  2 +-
 eddrit/const.py                               | 10 -----
 eddrit/reddit/content_parser/flair.py         | 12 +++---
 eddrit/reddit/content_parser/media.py         | 25 +++++--------
 eddrit/reddit/content_parser/video_parsers.py | 37 ++++++++++++++++---
 eddrit/reddit/parser.py                       | 21 +++++++----
 eddrit/utils/media.py                         | 23 ------------
 eddrit/utils/urls.py                          |  7 ++++
 pyproject.toml                                |  2 +-
 templates/macros/comments.html                |  6 +--
 templates/macros/post.html                    |  2 +-
 tests/utils/test_media.py                     | 14 -------
 12 files changed, 72 insertions(+), 89 deletions(-)
 delete mode 100644 eddrit/const.py
 delete mode 100644 eddrit/utils/media.py
 create mode 100644 eddrit/utils/urls.py
 delete mode 100644 tests/utils/test_media.py

diff --git a/eddrit/__init__.py b/eddrit/__init__.py
index e12f457..480b0e4 100644
--- a/eddrit/__init__.py
+++ b/eddrit/__init__.py
@@ -2,7 +2,7 @@
 from loguru import logger
 from eddrit import config
 
-__version__ = "0.5.6"
+__version__ = "0.5.7"
 
 logger.remove()
 logger.add(sys.stderr, level=config.LOG_LEVEL)
diff --git a/eddrit/const.py b/eddrit/const.py
deleted file mode 100644
index 2c3fa64..0000000
--- a/eddrit/const.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# List of image hosting domains.
-# Used to force parsing as media
-# if the post is a link post instead of a media post.
-MEDIA_HOSTING_DOMAINS = ["imgur.com"]
-
-# List of domains that have a special handling for their embed
-# and should not be parsed with the generic embed code
-DOMAINS_WITH_SPECIAL_EMBED_HANDLING = ["twitch.tv"]
-
-STATIC_RES_PATH_REPLACEMENT = "$STATIC_RES_PATH"
diff --git a/eddrit/reddit/content_parser/flair.py b/eddrit/reddit/content_parser/flair.py
index 7a921e2..37d147c 100644
--- a/eddrit/reddit/content_parser/flair.py
+++ b/eddrit/reddit/content_parser/flair.py
@@ -10,7 +10,7 @@ def get_post_flair(api_post_data: Dict[Hashable, Any]) -> Optional[models.Flair]
     text_color = (
         "black" if api_post_data["link_flair_text_color"] == "dark" else "white"
     )
-    bg_color = api_post_data["link_flair_background_color"]
+    bg_color = api_post_data["link_flair_background_color"] or "lightblue"
 
     if api_post_data.get("is_original_content", False):
         flair_components.append(
@@ -55,14 +55,12 @@ def get_user_flair(api_post_data: Dict[Hashable, Any]) -> Optional[models.Flair]
     flair_components = []
 
     # Background color
-    bg_color = api_post_data["author_flair_background_color"]
-    if not bg_color or bg_color == "#ffffff":
-        bg_color = "#dadada"
+    bg_color = api_post_data["author_flair_background_color"] or "lightblue"
 
     # Text color
-    text_color = api_post_data["author_flair_text_color"]
-    if not text_color:
-        text_color = "#0000"
+    text_color = (
+        "black" if api_post_data["author_flair_text_color"] == "dark" else "white"
+    )
 
     if api_post_data.get("author_flair_richtext"):
         for part in api_post_data.get("author_flair_richtext", []):
diff --git a/eddrit/reddit/content_parser/media.py b/eddrit/reddit/content_parser/media.py
index e9b06b7..5e15431 100644
--- a/eddrit/reddit/content_parser/media.py
+++ b/eddrit/reddit/content_parser/media.py
@@ -4,18 +4,16 @@
 
 
 from eddrit import models
-from eddrit.utils.media import (
-    post_is_from_domain,
-)
 from loguru import logger
 
 from eddrit.reddit.content_parser import video_parsers
+from eddrit.utils.urls import get_domain_and_suffix_from_url
 
 
 def _post_is_an_imgur_gif(api_post_data: Dict[Hashable, Any]) -> bool:
     """Check if a post is an imgur gif by checking domain and url file extension."""
     return (
-        post_is_from_domain(api_post_data["domain"], "imgur.com")
+        get_domain_and_suffix_from_url(api_post_data["domain"]) == "imgur.com"
         and ".gif" in api_post_data["url"]
     )
 
@@ -111,19 +109,14 @@ def get_post_video_content(
             video_parsers.get_reddit_video_preview,
         ]
 
-        # Special case for twitch, the embedly embed
-        # Content-Security-Policy prevents including it
-        if post_is_from_domain(api_post_data["domain"], "twitch.tv"):
-            parsers.append(video_parsers.get_twitch_embed)
+        post_domain = get_domain_and_suffix_from_url(api_post_data["domain"])
 
-        # Special case for imgur gif/gifv, it's easier to get the mp4 directly from the URL
-        if _post_is_an_imgur_gif(api_post_data):
-            parsers.append(video_parsers.get_imgur_gif)
-
-        # Special case for gfycat, some old links are not embed
-        # but it can be converted to it.
-        if post_is_from_domain(api_post_data["domain"], "gfycat.com"):
-            parsers.append(video_parsers.get_gfycat_embed)
+        # Special case for some embeds
+        domains_with_special_embed_handling = (
+            video_parsers.get_domains_with_special_embed_handling()
+        )
+        if post_domain in domains_with_special_embed_handling.keys():
+            parsers.append(domains_with_special_embed_handling[post_domain])
 
         parsed_results: list[models.PostVideo | models.EmbedPostContent] = []
         for parser in parsers:
diff --git a/eddrit/reddit/content_parser/video_parsers.py b/eddrit/reddit/content_parser/video_parsers.py
index 5cc896b..fea566f 100644
--- a/eddrit/reddit/content_parser/video_parsers.py
+++ b/eddrit/reddit/content_parser/video_parsers.py
@@ -1,13 +1,20 @@
 import html
-from typing import Any, Dict, Hashable
+import re
+from typing import Any, Callable, Dict, Hashable
 
 import lxml.html
+import tldextract
 
 from eddrit import models
-from eddrit.utils.media import domain_has_special_embed_handling
 from eddrit.utils.middlewares import get_current_host
 
 
+def _domain_has_special_embed_handling(domain: str) -> bool:
+    """Check if the given domain is a domain that has a special code for embed handling."""
+    _, domain, suffix = tldextract.extract(domain)
+    return f"{domain}.{suffix}" in get_domains_with_special_embed_handling().keys()
+
+
 def _cleanup_embed(content: str) -> str:
     """Cleanup embed content for embed posts"""
     content_parsed = lxml.html.fromstring(content)
@@ -20,13 +27,31 @@ def _cleanup_embed(content: str) -> str:
     return lxml.html.tostring(content_parsed).decode("utf-8")
 
 
+def get_domains_with_special_embed_handling() -> dict[str, Callable]:
+    """Return dict of domain associated with parsing function
+    of domains that have a special handling for their embed
+    and should not be parsed with the generic embed code"""
+    return {"twitch.tv": get_twitch_embed, "gfycat.com": get_gfycat_embed}
+
+
 def get_twitch_embed(api_post_data: Dict[Hashable, Any]) -> models.EmbedPostContent:
     """Fetch twitch embed directly as the one in the API has
     a Content-Security-Policy preventing including it.
     """
-    embed_url = api_post_data["url"].replace(
-        "clips.twitch.tv/", "clips.twitch.tv/embed?clip="
-    )
+
+    # There are two format of clips URLs
+    if "clips.twitch.tv" in api_post_data:
+        embed_url = api_post_data["url"].replace(
+            "clips.twitch.tv/", "clips.twitch.tv/embed?clip="
+        )
+    elif regex_clip_id := re.search(
+        "https://www.twitch.tv/.*/clip/(.*)", api_post_data["url"]
+    ):
+        clip_id = regex_clip_id.groups(0)[0]
+        embed_url = f"https://clips.twitch.tv/embed?clip={clip_id}"
+    else:
+        raise ValueError("Cannot parse Twitch embed")
+
     parent = get_current_host()
     embed_code = f'<iframe src="{embed_url}&parent={parent}" frameborder="0" allowfullscreen="true" scrolling="no" height="378" width="620"></iframe>'
     return models.EmbedPostContent(
@@ -71,7 +96,7 @@ def get_imgur_gif(api_post_data: Dict[Hashable, Any]) -> models.PostVideo:
 
 
 def get_embed_content(api_post_data: Dict[Hashable, Any]) -> models.EmbedPostContent:
-    if domain_has_special_embed_handling(api_post_data["url"]):
+    if _domain_has_special_embed_handling(api_post_data["url"]):
         raise ValueError("The post domain cannot be parsed with get_embed_content")
 
     embed_data = api_post_data["secure_media"]["oembed"]
diff --git a/eddrit/reddit/parser.py b/eddrit/reddit/parser.py
index 246dc3d..181f5c2 100644
--- a/eddrit/reddit/parser.py
+++ b/eddrit/reddit/parser.py
@@ -5,7 +5,6 @@
 import timeago
 
 from eddrit import models
-from eddrit.const import STATIC_RES_PATH_REPLACEMENT
 from eddrit.reddit.content_parser.flair import get_post_flair, get_user_flair
 from eddrit.reddit.content_parser.media import (
     get_post_gallery_content,
@@ -14,7 +13,16 @@
     post_has_video_content,
 )
 from eddrit.utils.math import pretty_big_num
-from eddrit.utils.media import is_media_hosting_domain
+from eddrit.utils.urls import get_domain_and_suffix_from_url
+
+# Constant used in templates to be replaced by the static path
+STATIC_RES_PATH_REPLACEMENT = "$STATIC_RES_PATH"
+
+# Domains that may be used in post of type link but that are majorly used for image hosting and should be parsed as such
+IMAGE_HOSTING_DOMAINS = ["imgur.com"]
+
+# Media domains to display as links (embed that cannot be displayed, scripts needed etc.)
+MEDIA_DOMAINS_TO_DISPLAY_AS_LINK = ["tiktok.com"]
 
 
 def get_post_content(api_post_data: Dict[Hashable, Any]) -> models.PostContentBase:
@@ -35,13 +43,12 @@ def get_post_content(api_post_data: Dict[Hashable, Any]) -> models.PostContentBa
     # Media posts
     hint = api_post_data.get("post_hint")
     has_video_content = post_has_video_content(api_post_data)
+    post_domain = get_domain_and_suffix_from_url(api_post_data["domain"])
     if (
-        hint == "image"
-        or hint == "hosted:video"
-        or hint == "rich:video"
-        or (hint == "link" and is_media_hosting_domain(api_post_data["domain"]))
+        hint in ["image", "hosted:video", "rich:video"]
+        or (hint == "link" and post_domain in IMAGE_HOSTING_DOMAINS)
         or has_video_content
-    ):
+    ) and post_domain not in MEDIA_DOMAINS_TO_DISPLAY_AS_LINK:
         # Check if image has video (then consider video) else consider image
         if has_video_content:
             return get_post_video_content(api_post_data)
diff --git a/eddrit/utils/media.py b/eddrit/utils/media.py
deleted file mode 100644
index 355138b..0000000
--- a/eddrit/utils/media.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import tldextract
-
-from eddrit import const
-
-
-def is_media_hosting_domain(image_domain: str) -> bool:
-    """
-    Check if the given domain is known to be an media hosting domain like imgur.
-    """
-    _, domain, suffix = tldextract.extract(image_domain)
-    return f"{domain}.{suffix}" in const.MEDIA_HOSTING_DOMAINS
-
-
-def post_is_from_domain(post_domain: str, domain_to_check: str) -> bool:
-    """Check if a post is from a given domain"""
-    _, domain, suffix = tldextract.extract(post_domain)
-    return f"{domain}.{suffix}" == domain_to_check
-
-
-def domain_has_special_embed_handling(domain: str) -> bool:
-    """Check if the given domain is a domain that has a special code for embed handling."""
-    _, domain, suffix = tldextract.extract(domain)
-    return f"{domain}.{suffix}" in const.DOMAINS_WITH_SPECIAL_EMBED_HANDLING
diff --git a/eddrit/utils/urls.py b/eddrit/utils/urls.py
new file mode 100644
index 0000000..5792624
--- /dev/null
+++ b/eddrit/utils/urls.py
@@ -0,0 +1,7 @@
+import tldextract
+
+
+def get_domain_and_suffix_from_url(url: str) -> str:
+    """Get domain name and suffix from url"""
+    _, domain, suffix = tldextract.extract(url)
+    return f"{domain}.{suffix}"
diff --git a/pyproject.toml b/pyproject.toml
index 081a0ce..cf05d31 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "eddrit"
-version = "0.5.6"
+version = "0.5.7"
 description = "Alternative Reddit frontend"
 authors = ["corenting <corenting@gmail.com>"]
 license = "MIT"
diff --git a/templates/macros/comments.html b/templates/macros/comments.html
index b4e79ed..b4763ab 100644
--- a/templates/macros/comments.html
+++ b/templates/macros/comments.html
@@ -3,21 +3,21 @@
 {% macro render_author(comment, subreddit_name) %}
 {% set author_tag = '' %}
 {% if comment.is_submitter %}
-<span class="post-flair post-comment-title-item" style="background-color: blue;">
+<span class="post-flair post-comment-title-item" style="color: white; background-color: darkblue;">
     <span>
         {{ comment.author.name }}
     </span>
 </span>
 {% set author_tag = '<abbr title="Submitter">[S]</abbr>' %}
 {% elif comment.is_admin %}
-<span class="post-flair post-comment-title-item" style="background-color: red;">
+<span class="post-flair post-comment-title-item" style="color: white; background-color: red;">
     <span>
         {{ comment.author.name }}
     </span>
 </span>
 {% set author_tag = '<abbr title="Reddit admin">[A]</abbr>' %}
 {% elif comment.is_moderator %}
-<span class="post-flair post-comment-title-item" style="background-color: green;">
+<span class="post-flair post-comment-title-item" style="color: white; background-color: green;">
     <span>
         {{ comment.author.name }}
     </span>
diff --git a/templates/macros/post.html b/templates/macros/post.html
index d7e8bd8..79ec4b6 100644
--- a/templates/macros/post.html
+++ b/templates/macros/post.html
@@ -122,7 +122,7 @@
     </div>
     {% endif %}
     {% if post.content.type.value == 'video' %}
-    <div class="post-content-image-or-video needs-js post-content-video"
+    <div class="post-content-image-or-video needs-js"
         style="{{ 'width: fit-content' if post.content.videos[0].width < post.content.videos[0].height else 'height: fit-content;'}} ">
         <video class="video-js" id="video-{{ post.id }}" data-content='{{ post.content | tojson_dataclass }}'/>
     </div>
diff --git a/tests/utils/test_media.py b/tests/utils/test_media.py
deleted file mode 100644
index c5539a3..0000000
--- a/tests/utils/test_media.py
+++ /dev/null
@@ -1,14 +0,0 @@
-import pytest
-
-from eddrit.utils import media
-
-
-@pytest.mark.parametrize(
-    "domain,expected",
-    [
-        ("imgur.com", True),
-        ("github.com", False),
-    ],
-)
-def test_is_image_or_video_host(domain: str, expected: bool) -> None:
-    assert media.is_media_hosting_domain(domain) == expected