Fixes following review

- Simplify Youtube class to get rid of typing issues - Small fixes
openzim · Oct 19, 2023 · efcd9ab · efcd9ab
1 parent fbd9748
commit efcd9ab
Show file tree

Hide file tree

Showing 10 changed files with 36 additions and 71 deletions.
diff --git a/contrib/reencode_low_quality.py b/contrib/reencode_low_quality.py
@@ -30,9 +30,8 @@ def main(build_path):
         sys.exit(1)
 
     # retrieve source video_format
-    with open(build_dir.joinpath("metadata.json")) as fp:
-        metadata = json.load(fp)
-        video_format = metadata["video_format"]
+    metadata = json.loads(build_dir.joinpath("metadata.json").read_bytes())
+    video_format = metadata["video_format"]
 
     if video_format == "mp4":
         args = VideoMp4Low().to_ffmpeg_args()
@@ -51,8 +50,7 @@ def main(build_path):
 
 
 if __name__ == "__main__":
-    nb_expected_args = 2
-    if len(sys.argv) != nb_expected_args:
+    if len(sys.argv) != 2:  # noqa: PLR2004
         logger.error("you must supply a path to a build folder")
         sys.exit(1)
     main(sys.argv[-1])
diff --git a/contrib/video_encoding_tester.py b/contrib/video_encoding_tester.py
@@ -155,7 +155,8 @@ def download_original(output_dir, youtube_id, video_format):
     fpath = expected_path.parent.joinpath(expected_path.stem)
     audext, vidext = {"webm": ("webm", "webm"), "mp4": ("m4a", "mp4")}[video_format]
     subprocess.run(
-        [  # noqa: S607 # nosec B607
+        [
+            "/usr/bin/env",
             "youtube-dl",
             "-o",
             f"{fpath}.%(ext)s",
@@ -223,16 +224,14 @@ def hduration(value):
         return humanfriendly.format_timespan(value)
 
     def hsduration(value):
-        seconds_per_hour = 3600
-        seconds_per_minute = 60
-        if value >= seconds_per_hour:
-            hours = value // seconds_per_hour
-            value = value % seconds_per_hour
+        if value >= 3600:  # noqa: PLR2004
+            hours = value // 3600
+            value = value % 3600
         else:
             hours = 0
-        if value >= seconds_per_minute:
-            minutes = value // seconds_per_minute
-            value = value % seconds_per_minute
+        if value >= 60:  # noqa: PLR2004
+            minutes = value // 60
+            value = value % 60
         else:
             minutes = 0
         return f"{hours:02}:{minutes:02}:{value:02}"

diff --git a/src/youtube2zim/__main__.py b/src/youtube2zim/__main__.py
@@ -1,18 +1,9 @@
 #!/usr/bin/env python3
 # vim: ai ts=4 sts=4 et sw=4 nu
 
-import pathlib
 import sys
 
-
-def main():
-    # allows running it from source using python youtube2zim
-    sys.path = [str(pathlib.Path(__file__).parent.parent.resolve()), *sys.path]
-
-    from youtube2zim.entrypoint import main as entry
-
-    return entry()
-
+from youtube2zim.entrypoint import main
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/src/youtube2zim/constants.py b/src/youtube2zim/constants.py
@@ -2,13 +2,13 @@
 # vim: ai ts=4 sts=4 et sw=4 nu
 
 import logging
-import pathlib
+from pathlib import Path
 
 from zimscraperlib.logging import getLogger
 
 from youtube2zim.__about__ import __version__
 
-ROOT_DIR = pathlib.Path(__file__).parent
+ROOT_DIR = Path(__file__).parent
 NAME = ROOT_DIR.name
 
 SCRAPER = f"{NAME} {__version__}"
@@ -33,14 +33,9 @@
 
 
 class Youtube:
-    def __init__(self):
-        self.build_dir = None
-        self.cache_dir = None
-        self.api_key = None
-
-    def update(self, **kwargs):
-        for key, value in kwargs.items():
-            setattr(self, key, value)
+    build_dir: Path
+    cache_dir: Path
+    api_key: str
 
 
 YOUTUBE = Youtube()
diff --git a/src/youtube2zim/entrypoint.py b/src/youtube2zim/entrypoint.py
@@ -11,7 +11,6 @@
     PLAYLIST,
     SCRAPER,
     USER,
-    YOUTUBE,
     logger,
 )
 from youtube2zim.scraper import Youtube2Zim
@@ -217,7 +216,7 @@ def main():
     try:
         if args.max_concurrency < 1:
             raise ValueError(f"Invalid concurrency value: {args.max_concurrency}")
-        scraper = Youtube2Zim(**dict(args._get_kwargs()), youtube_store=YOUTUBE)
+        scraper = Youtube2Zim(**dict(args._get_kwargs()))
         return scraper.run()
     except Exception as exc:
         logger.error(f"FAILED. An error occurred: {exc}")

diff --git a/src/youtube2zim/playlists/__main__.py b/src/youtube2zim/playlists/__main__.py
@@ -1,18 +1,9 @@
 #!/usr/bin/env python3
 # vim: ai ts=4 sts=4 et sw=4 nu
 
-import pathlib
 import sys
 
-
-def main():
-    # allows running it from source using python youtube2zim
-    sys.path = [str(pathlib.Path(__file__).parent.parent.parent.resolve()), *sys.path]
-
-    from youtube2zim.playlists.entrypoint import main as entry
-
-    return entry()
-
+from youtube2zim.playlists.entrypoint import main
 
 if __name__ == "__main__":
     sys.exit(main())
diff --git a/src/youtube2zim/playlists/scraper.py b/src/youtube2zim/playlists/scraper.py
@@ -53,11 +53,9 @@ def __init__(
         self.metadata = {}  # custom metadata holder
 
         # update youtube credentials store
-        YOUTUBE.update(
-            build_dir=self.build_dir,
-            api_key=self.api_key,
-            cache_dir=self.build_dir.joinpath("cache"),
-        )
+        YOUTUBE.build_dir = self.build_dir
+        YOUTUBE.api_key = self.api_key
+        YOUTUBE.cache_dir = self.build_dir.joinpath("cache")
 
     @property
     def youtube2zim_exe(self):

diff --git a/src/youtube2zim/scraper.py b/src/youtube2zim/scraper.py
@@ -41,6 +41,7 @@
     ROOT_DIR,
     SCRAPER,
     USER,
+    YOUTUBE,
     YOUTUBE_LANG_MAP,
     logger,
 )
@@ -85,7 +86,6 @@ def __init__(
         tmp_dir,
         keep_build_dir,
         max_concurrency,
-        youtube_store,
         language,
         locale_name,
         tags,
@@ -149,9 +149,9 @@ def __init__(
         self.max_concurrency = max_concurrency
 
         # update youtube credentials store
-        youtube_store.update(
-            build_dir=self.build_dir, api_key=self.api_key, cache_dir=self.cache_dir
-        )
+        YOUTUBE.build_dir = self.build_dir
+        YOUTUBE.api_key = self.api_key
+        YOUTUBE.cache_dir = self.cache_dir
 
         # Optimization-cache
         self.s3_url_with_credentials = s3_url_with_credentials
@@ -901,8 +901,7 @@ def to_jinja_subtitle(lang):
             loader=jinja2.FileSystemLoader(str(self.templates_dir)), autoescape=True
         )
 
-        videos = load_mandatory_json(self.cache_dir, "videos")
-        videos = videos.values()
+        videos = load_mandatory_json(self.cache_dir, "videos").values()
         # filter videos so we only include the ones we could retrieve
         videos = list(filter(is_present, videos))
         videos_channels = load_mandatory_json(self.cache_dir, "videos_channels")

diff --git a/src/youtube2zim/utils.py b/src/youtube2zim/utils.py
@@ -2,6 +2,7 @@
 # vim: ai ts=4 sts=4 et sw=4 nu
 
 import json
+from pathlib import Path
 
 import jinja2
 from slugify import slugify
@@ -19,31 +20,26 @@ def clean_text(text):
     return text.strip().replace("\n", " ").replace("\r", " ")
 
 
-def save_json(cache_dir, key, data):
+def save_json(cache_dir: Path, key, data):
     """save JSON collection to path"""
     with open(cache_dir.joinpath(f"{key}.json"), "w") as fp:
         json.dump(data, fp, indent=4)
 
 
-def load_json(cache_dir, key):
+def load_json(cache_dir: Path, key):
     """load JSON collection from path or None"""
     fname = cache_dir.joinpath(f"{key}.json")
     if not fname.exists():
         return None
     try:
-        with open(fname) as fp:
-            return json.load(fp)
+        return json.loads(fname.read_bytes())
     except Exception:
         return None
 
 
-def load_mandatory_json(cache_dir, key):
-    """load mandatory JSON collection from path or raise an error"""
-    fname = cache_dir.joinpath(f"{key}.json")
-    if not fname.exists():
-        raise Exception(f"JSON file at {fname} not found")
-    with open(fname) as fp:
-        return json.load(fp)
+def load_mandatory_json(cache_dir: Path, key):
+    """load mandatory JSON collection from path"""
+    return json.loads(cache_dir.joinpath(f"{key}.json").read_bytes())
 
 
 def has_argument(arg_name, all_args):

diff --git a/src/youtube2zim/youtube.py b/src/youtube2zim/youtube.py
@@ -269,14 +269,13 @@ def save_channel_branding(channels_dir, channel_id, *, save_banner=False):
             thumnbail = thumbnails[quality]["url"]
             break
 
-    if not thumnbail:
-        raise Exception("thumnbail not found")
-
     channel_dir = channels_dir.joinpath(channel_id)
     channel_dir.mkdir(exist_ok=True)
 
     profile_path = channel_dir.joinpath("profile.jpg")
     if not profile_path.exists():
+        if not thumnbail:
+            raise Exception("thumnbail not found")
         stream_file(thumnbail, profile_path)
         # resize profile as we only use up 100px/80 sq
         resize_image(profile_path, width=100, height=100)