From 5f4bc1c2608c7c152ff8dc439f160eb6dfc8a8b5 Mon Sep 17 00:00:00 2001
From: Harry Chen <i@harrychen.xyz>
Date: Sat, 17 Aug 2024 13:33:31 +0800
Subject: [PATCH] Run formatter on some Python scripts

Signed-off-by: Harry Chen <i@harrychen.xyz>
---
 adoptium.py         |   8 +-
 anaconda.py         | 156 ++++++++++++++++++-----------
 apt-sync.py         | 238 +++++++++++++++++++++++++++++---------------
 github-raw.py       |  86 ++++++++++------
 homebrew-bottles.py |  52 ++++++----
 yum-sync.py         | 167 +++++++++++++++++++------------
 6 files changed, 449 insertions(+), 258 deletions(-)

diff --git a/adoptium.py b/adoptium.py
index a51a07d..38dd196 100755
--- a/adoptium.py
+++ b/adoptium.py
@@ -1,17 +1,11 @@
 #!/usr/bin/env python3
 import hashlib
-import traceback
-import json
 import os
-import re
-import shutil
 import subprocess as sp
-import tempfile
-import argparse
 import time
 from email.utils import parsedate_to_datetime
 from pathlib import Path
-from typing import List, Set, Tuple, IO
+from typing import Set
 import requests
 
 DOWNLOAD_TIMEOUT = int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
diff --git a/anaconda.py b/anaconda.py
index fa2ed8d..c20878a 100755
--- a/anaconda.py
+++ b/anaconda.py
@@ -25,6 +25,7 @@
 
 WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR")
 
+# fmt: off
 CONDA_REPOS = ("main", "free", "r", "msys2")
 CONDA_ARCHES = (
     "noarch", "linux-64", "linux-32", "linux-aarch64", "linux-armv6l", "linux-armv7l",
@@ -72,6 +73,7 @@
 EXCLUDED_PACKAGES = (
     "pytorch-nightly", "pytorch-nightly-cpu", "ignite-nightly",
 )
+# fmt: on
 
 # connect and read timeout value
 TIMEOUT_OPTION = (7, 10)
@@ -84,28 +86,31 @@
     format="[%(asctime)s] [%(levelname)s] %(message)s",
 )
 
-def sizeof_fmt(num, suffix='iB'):
-    for unit in ['','K','M','G','T','P','E','Z']:
+
+def sizeof_fmt(num, suffix="iB"):
+    for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]:
         if abs(num) < 1024.0:
             return "%3.2f%s%s" % (num, unit, suffix)
         num /= 1024.0
-    return "%.2f%s%s" % (num, 'Y', suffix)
+    return "%.2f%s%s" % (num, "Y", suffix)
+
 
 def md5_check(file: Path, md5: str = None):
     m = hashlib.md5()
-    with file.open('rb') as f:
+    with file.open("rb") as f:
         while True:
-            buf = f.read(1*1024*1024)
+            buf = f.read(1 * 1024 * 1024)
             if not buf:
                 break
             m.update(buf)
     return m.hexdigest() == md5
 
+
 def sha256_check(file: Path, sha256: str = None):
     m = hashlib.sha256()
-    with file.open('rb') as f:
+    with file.open("rb") as f:
         while True:
-            buf = f.read(1*1024*1024)
+            buf = f.read(1 * 1024 * 1024)
             if not buf:
                 break
             m.update(buf)
@@ -113,34 +118,42 @@ def sha256_check(file: Path, sha256: str = None):
 
 
 def curl_download(remote_url: str, dst_file: Path, sha256: str = None, md5: str = None):
-    sp.check_call([
-        "curl", "-o", str(dst_file),
-        "-sL", "--remote-time", "--show-error",
-        "--fail", "--retry", "10", "--speed-time", "15",
-        "--speed-limit", "5000", remote_url,
-    ])
+    # fmt: off
+    sp.check_call(
+        [
+            "curl", "-o", str(dst_file),
+            "-sL", "--remote-time", "--show-error",
+            "--fail", "--retry", "10",
+            "--speed-time", "15",
+            "--speed-limit", "5000",
+            remote_url,
+        ]
+    )
+    # fmt: on
     if sha256 and (not sha256_check(dst_file, sha256)):
         return "SHA256 mismatch"
     if md5 and (not md5_check(dst_file, md5)):
         return "MD5 mismatch"
 
 
-def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove_legacy: bool):
+def sync_repo(
+    repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove_legacy: bool
+):
     logging.info("Start syncing {}".format(repo_url))
     local_dir.mkdir(parents=True, exist_ok=True)
 
-    repodata_url = repo_url + '/repodata.json'
-    bz2_repodata_url = repo_url + '/repodata.json.bz2'
+    repodata_url = repo_url + "/repodata.json"
+    bz2_repodata_url = repo_url + "/repodata.json.bz2"
     # https://github.com/conda/conda/issues/13256, from conda 24.1.x
-    zst_repodata_url = repo_url + '/repodata.json.zst'
+    zst_repodata_url = repo_url + "/repodata.json.zst"
     # https://docs.conda.io/projects/conda-build/en/latest/release-notes.html
     # "current_repodata.json" - like repodata.json, but only has the newest version of each file
-    current_repodata_url = repo_url + '/current_repodata.json'
+    current_repodata_url = repo_url + "/current_repodata.json"
 
     tmp_repodata = tmpdir / "repodata.json"
     tmp_bz2_repodata = tmpdir / "repodata.json.bz2"
     tmp_zst_repodata = tmpdir / "repodata.json.zst"
-    tmp_current_repodata = tmpdir / 'current_repodata.json'
+    tmp_current_repodata = tmpdir / "current_repodata.json"
 
     curl_download(repodata_url, tmp_repodata)
     curl_download(bz2_repodata_url, tmp_bz2_repodata)
@@ -158,31 +171,33 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
 
     remote_filelist = []
     total_size = 0
-    legacy_packages = repodata['packages']
+    legacy_packages = repodata["packages"]
     conda_packages = repodata.get("packages.conda", {})
     if remove_legacy:
         # https://github.com/anaconda/conda/blob/0dbf85e0546e0b0dc060c8265ec936591ccbe980/conda/core/subdir_data.py#L440-L442
-        use_legacy_packages = set(legacy_packages.keys()) - set(k[:-6] + ".tar.bz2" for k in conda_packages.keys())
+        use_legacy_packages = set(legacy_packages.keys()) - set(
+            k[:-6] + ".tar.bz2" for k in conda_packages.keys()
+        )
         legacy_packages = {k: legacy_packages[k] for k in use_legacy_packages}
     packages = {**legacy_packages, **conda_packages}
 
     for filename, meta in packages.items():
-        if meta['name'] in EXCLUDED_PACKAGES:
+        if meta["name"] in EXCLUDED_PACKAGES:
             continue
 
-        file_size = meta['size']
+        file_size = meta["size"]
         # prefer sha256 over md5
         sha256 = None
         md5 = None
-        if 'sha256' in meta:
-            sha256 = meta['sha256']
-        elif 'md5' in meta:
-            md5 = meta['md5']
+        if "sha256" in meta:
+            sha256 = meta["sha256"]
+        elif "md5" in meta:
+            md5 = meta["md5"]
         total_size += file_size
 
-        pkg_url = '/'.join([repo_url, filename])
+        pkg_url = "/".join([repo_url, filename])
         dst_file = local_dir / filename
-        dst_file_wip = local_dir / ('.downloading.' + filename)
+        dst_file_wip = local_dir / (".downloading." + filename)
         remote_filelist.append(dst_file)
 
         if dst_file.is_file():
@@ -202,7 +217,7 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
                 if err is None:
                     dst_file_wip.rename(dst_file)
             except sp.CalledProcessError:
-                err = 'CalledProcessError'
+                err = "CalledProcessError"
             if err is None:
                 break
             logging.error("Failed to download {}: {}".format(filename, err))
@@ -223,11 +238,15 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
     tmp_current_repodata_gz_gened = False
     if tmp_current_repodata.is_file():
         if os.path.getsize(tmp_current_repodata) > GEN_METADATA_JSON_GZIP_THRESHOLD:
-            sp.check_call(["gzip", "--no-name", "--keep", "--", str(tmp_current_repodata)])
-            shutil.move(str(tmp_current_repodata) + ".gz", str(local_dir / "current_repodata.json.gz"))
+            sp.check_call(
+                ["gzip", "--no-name", "--keep", "--", str(tmp_current_repodata)]
+            )
+            shutil.move(
+                str(tmp_current_repodata) + ".gz",
+                str(local_dir / "current_repodata.json.gz"),
+            )
             tmp_current_repodata_gz_gened = True
-        shutil.move(str(tmp_current_repodata), str(
-            local_dir / "current_repodata.json"))
+        shutil.move(str(tmp_current_repodata), str(local_dir / "current_repodata.json"))
     if not tmp_current_repodata_gz_gened:
         # If the gzip file is not generated, remove the dangling gzip archive
         Path(local_dir / "current_repodata.json.gz").unlink(missing_ok=True)
@@ -235,9 +254,9 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
     if delete:
         local_filelist = []
         delete_count = 0
-        for i in local_dir.glob('*.tar.bz2'):
+        for i in local_dir.glob("*.tar.bz2"):
             local_filelist.append(i)
-        for i in local_dir.glob('*.conda'):
+        for i in local_dir.glob("*.conda"):
             local_filelist.append(i)
         for i in set(local_filelist) - set(remote_filelist):
             logging.info("Deleting {}".format(i))
@@ -245,46 +264,53 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove
             delete_count += 1
         logging.info("{} files deleted".format(delete_count))
 
-    logging.info("{}: {} files, {} in total".format(
-        repodata_url, len(remote_filelist), sizeof_fmt(total_size)))
+    logging.info(
+        "{}: {} files, {} in total".format(
+            repodata_url, len(remote_filelist), sizeof_fmt(total_size)
+        )
+    )
     return total_size
 
+
 def sync_installer(repo_url, local_dir: Path):
     logging.info("Start syncing {}".format(repo_url))
     local_dir.mkdir(parents=True, exist_ok=True)
-    full_scan = random.random() < 0.1 # Do full version check less frequently
+    full_scan = random.random() < 0.1  # Do full version check less frequently
 
     def remote_list():
         r = requests.get(repo_url, timeout=TIMEOUT_OPTION)
         d = pq(r.content)
-        for tr in d('table').find('tr'):
-            tds = pq(tr).find('td')
+        for tr in d("table").find("tr"):
+            tds = pq(tr).find("td")
             if len(tds) != 4:
                 continue
-            fname = tds[0].find('a').text
+            fname = tds[0].find("a").text
             sha256 = tds[3].text
-            if sha256 == '<directory>' or len(sha256) != 64:
+            if sha256 == "<directory>" or len(sha256) != 64:
                 continue
             yield (fname, sha256)
 
     for filename, sha256 in remote_list():
         pkg_url = "/".join([repo_url, filename])
         dst_file = local_dir / filename
-        dst_file_wip = local_dir / ('.downloading.' + filename)
+        dst_file_wip = local_dir / (".downloading." + filename)
 
         if dst_file.is_file():
             r = requests.head(pkg_url, allow_redirects=True, timeout=TIMEOUT_OPTION)
-            len_avail = 'content-length' in r.headers
+            len_avail = "content-length" in r.headers
             if len_avail:
-                remote_filesize = int(r.headers['content-length'])
-            remote_date = parsedate_to_datetime(r.headers['last-modified'])
+                remote_filesize = int(r.headers["content-length"])
+            remote_date = parsedate_to_datetime(r.headers["last-modified"])
             stat = dst_file.stat()
             local_filesize = stat.st_size
             local_mtime = stat.st_mtime
 
             # Do content verification on ~5% of files (see issue #25)
-            if (not len_avail or remote_filesize == local_filesize) and remote_date.timestamp() == local_mtime and \
-                    (random.random() < 0.95 or sha256_check(dst_file, sha256)):
+            if (
+                (not len_avail or remote_filesize == local_filesize)
+                and remote_date.timestamp() == local_mtime
+                and (random.random() < 0.95 or sha256_check(dst_file, sha256))
+            ):
                 logging.info("Skipping {}".format(filename))
 
                 # Stop the scanning if the most recent version is present
@@ -299,25 +325,31 @@ def remote_list():
 
         for retry in range(3):
             logging.info("Downloading {}".format(filename))
-            err = ''
+            err = ""
             try:
                 err = curl_download(pkg_url, dst_file_wip, sha256=sha256)
                 if err is None:
                     dst_file_wip.rename(dst_file)
             except sp.CalledProcessError:
-                err = 'CalledProcessError'
+                err = "CalledProcessError"
             if err is None:
                 break
             logging.error("Failed to download {}: {}".format(filename, err))
 
+
 def main():
     import argparse
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--working-dir", default=WORKING_DIR)
-    parser.add_argument("--delete", action='store_true',
-                        help='delete unreferenced package files')
-    parser.add_argument("--remove-legacy", action='store_true',
-                        help='delete legacy packages which have conda counterpart. Requires client conda >= 4.7.0')
+    parser.add_argument(
+        "--delete", action="store_true", help="delete unreferenced package files"
+    )
+    parser.add_argument(
+        "--remove-legacy",
+        action="store_true",
+        help="delete legacy packages which have conda counterpart. Requires client conda >= 4.7.0",
+    )
     args = parser.parse_args()
 
     if args.working_dir is None:
@@ -336,7 +368,8 @@ def main():
         try:
             sync_installer(remote_url, local_dir)
             size_statistics += sum(
-                f.stat().st_size for f in local_dir.glob('*') if f.is_file())
+                f.stat().st_size for f in local_dir.glob("*") if f.is_file()
+            )
         except Exception:
             logging.exception("Failed to sync installers of {}".format(dist))
             success = False
@@ -348,8 +381,9 @@ def main():
 
             tmpdir = tempfile.mkdtemp()
             try:
-                size_statistics += sync_repo(remote_url,
-                                             local_dir, Path(tmpdir), args.delete, args.remove_legacy)
+                size_statistics += sync_repo(
+                    remote_url, local_dir, Path(tmpdir), args.delete, args.remove_legacy
+                )
             except Exception:
                 logging.exception("Failed to sync repo: {}/{}".format(repo, arch))
                 success = False
@@ -362,8 +396,9 @@ def main():
 
         tmpdir = tempfile.mkdtemp()
         try:
-            size_statistics += sync_repo(remote_url,
-                                         local_dir, Path(tmpdir), args.delete, args.remove_legacy)
+            size_statistics += sync_repo(
+                remote_url, local_dir, Path(tmpdir), args.delete, args.remove_legacy
+            )
         except Exception:
             logging.exception("Failed to sync repo: {}".format(repo))
             success = False
@@ -374,6 +409,7 @@ def main():
     if not success:
         sys.exit(1)
 
+
 if __name__ == "__main__":
     main()
 
diff --git a/apt-sync.py b/apt-sync.py
index 5005798..b8b533e 100755
--- a/apt-sync.py
+++ b/apt-sync.py
@@ -4,7 +4,6 @@
 import os
 import re
 import shutil
-import subprocess as sp
 import argparse
 import bz2
 import gzip
@@ -23,21 +22,27 @@
 
 # set preferred address family
 import requests.packages.urllib3.util.connection as urllib3_cn
-USE_ADDR_FAMILY = os.getenv('USE_ADDR_FAMILY', '').strip().lower()
-if USE_ADDR_FAMILY != '':
-    assert USE_ADDR_FAMILY in ['ipv4', 'ipv6'], "USE_ADDR_FAMILY must be either ipv4 or ipv6"
-    urllib3_cn.allowed_gai_family = lambda: socket.AF_INET if USE_ADDR_FAMILY == 'ipv4' else socket.AF_INET6
+
+USE_ADDR_FAMILY = os.getenv("USE_ADDR_FAMILY", "").strip().lower()
+if USE_ADDR_FAMILY != "":
+    assert USE_ADDR_FAMILY in [
+        "ipv4",
+        "ipv6",
+    ], "USE_ADDR_FAMILY must be either ipv4 or ipv6"
+    urllib3_cn.allowed_gai_family = lambda: (
+        socket.AF_INET if USE_ADDR_FAMILY == "ipv4" else socket.AF_INET6
+    )
 
 OS_TEMPLATE = {
-    'ubuntu-lts': ["focal", "jammy", "noble"],
-    'debian-current': ["bullseye", "bookworm"],
-    'debian-latest2': ["bullseye", "bookworm"],
-    'debian-latest': ["bookworm"],
+    "ubuntu-lts": ["focal", "jammy", "noble"],
+    "debian-current": ["bullseye", "bookworm"],
+    "debian-latest2": ["bullseye", "bookworm"],
+    "debian-latest": ["bookworm"],
 }
-ARCH_NO_PKGIDX = ['dep11', 'i18n', 'cnf']
-MAX_RETRY=int(os.getenv('MAX_RETRY', '3'))
-DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
-REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '')
+ARCH_NO_PKGIDX = ["dep11", "i18n", "cnf"]
+MAX_RETRY = int(os.getenv("MAX_RETRY", "3"))
+DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))
+REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "")
 
 pattern_os_template = re.compile(r"@\{(.+)\}")
 pattern_package_name = re.compile(r"^Filename: (.+)$", re.MULTILINE)
@@ -45,11 +50,13 @@
 pattern_package_sha256 = re.compile(r"^SHA256: (\w{64})$", re.MULTILINE)
 download_cache = dict()
 
+
 def check_args(prop: str, lst: List[str]):
     for s in lst:
-        if len(s)==0 or ' ' in s:
+        if len(s) == 0 or " " in s:
             raise ValueError(f"Invalid item in {prop}: {repr(s)}")
 
+
 def replace_os_template(os_list: List[str]) -> List[str]:
     ret = []
     for i in os_list:
@@ -57,103 +64,137 @@ def replace_os_template(os_list: List[str]) -> List[str]:
         if matched:
             for os in OS_TEMPLATE[matched.group(1)]:
                 ret.append(pattern_os_template.sub(os, i))
-        elif i.startswith('@'):
+        elif i.startswith("@"):
             ret.extend(OS_TEMPLATE[i[1:]])
         else:
             ret.append(i)
     return ret
 
-def check_and_download(url: str, dst_file: Path, caching = False)->int:
+
+def check_and_download(url: str, dst_file: Path, caching=False) -> int:
     try:
         if caching:
             if url in download_cache:
                 print(f"Using cached content: {url}", flush=True)
-                with dst_file.open('wb') as f:
+                with dst_file.open("wb") as f:
                     f.write(download_cache[url])
                 return 0
             download_cache[url] = bytes()
         start = time.time()
         with requests.get(url, stream=True, timeout=(5, 10)) as r:
             r.raise_for_status()
-            if 'last-modified' in r.headers:
+            if "last-modified" in r.headers:
                 remote_ts = parsedate_to_datetime(
-                    r.headers['last-modified']).timestamp()
-            else: remote_ts = None
+                    r.headers["last-modified"]
+                ).timestamp()
+            else:
+                remote_ts = None
 
-            with dst_file.open('wb') as f:
+            with dst_file.open("wb") as f:
                 for chunk in r.iter_content(chunk_size=1024**2):
                     if time.time() - start > DOWNLOAD_TIMEOUT:
                         raise TimeoutError("Download timeout")
-                    if not chunk: continue # filter out keep-alive new chunks
+                    if not chunk:
+                        continue  # filter out keep-alive new chunks
 
                     f.write(chunk)
-                    if caching: download_cache[url] += chunk
+                    if caching:
+                        download_cache[url] += chunk
             if remote_ts is not None:
                 os.utime(dst_file, (remote_ts, remote_ts))
         return 0
     except BaseException as e:
         print(e, flush=True)
-        if dst_file.is_file(): dst_file.unlink()
-        if url in download_cache: del download_cache[url]
+        if dst_file.is_file():
+            dst_file.unlink()
+        if url in download_cache:
+            del download_cache[url]
     return 1
 
-def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]:
+
+def mkdir_with_dot_tmp(folder: Path) -> Tuple[Path, Path]:
     tmpdir = folder / ".tmp"
     if tmpdir.is_dir():
         shutil.rmtree(str(tmpdir))
     tmpdir.mkdir(parents=True, exist_ok=True)
     return (folder, tmpdir)
 
+
 def move_files_in(src: Path, dst: Path):
     empty = True
-    for file in src.glob('*'):
+    for file in src.glob("*"):
         empty = False
         print(f"moving {file} to {dst}")
         # shutil.move(str(file), str(dst))
         if file.is_dir():
             (dst / file.name).mkdir(parents=True, exist_ok=True)
             move_files_in(file, dst / file.name)
-            file.rmdir() # rmdir wont fail as all files in it have been moved
+            file.rmdir()  # rmdir wont fail as all files in it have been moved
         else:
-            file.rename(dst / file.name) # Overwrite files
+            file.rename(dst / file.name)  # Overwrite files
     if empty:
         print(f"{src} is empty")
 
-def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Path, deb_set: Dict[str, int])->int:
+
+def apt_mirror(
+    base_url: str,
+    dist: str,
+    repo: str,
+    arch: str,
+    dest_base_dir: Path,
+    deb_set: Dict[str, int],
+) -> int:
     if not dest_base_dir.is_dir():
         print("Destination directory is empty, cannot continue")
         return 1
     print(f"Started mirroring {base_url} {dist}, {repo}, {arch}!", flush=True)
 
-	# download Release files
-    dist_dir,dist_tmp_dir = mkdir_with_dot_tmp(dest_base_dir / "dists" / dist)
-    check_and_download(f"{base_url}/dists/{dist}/InRelease",dist_tmp_dir / "InRelease", caching=True)
-    if check_and_download(f"{base_url}/dists/{dist}/Release",dist_tmp_dir / "Release", caching=True) != 0:
+    # download Release files
+    dist_dir, dist_tmp_dir = mkdir_with_dot_tmp(dest_base_dir / "dists" / dist)
+    check_and_download(
+        f"{base_url}/dists/{dist}/InRelease", dist_tmp_dir / "InRelease", caching=True
+    )
+    if (
+        check_and_download(
+            f"{base_url}/dists/{dist}/Release", dist_tmp_dir / "Release", caching=True
+        )
+        != 0
+    ):
         print("Invalid Repository")
-        if not (dist_dir/"Release").is_file():
-            print(f"{dist_dir/'Release'} never existed, upstream may not provide packages for {dist}, ignore this error")
+        if not (dist_dir / "Release").is_file():
+            print(
+                f"{dist_dir/'Release'} never existed, upstream may not provide packages for {dist}, ignore this error"
+            )
             return 0
         return 1
-    check_and_download(f"{base_url}/dists/{dist}/Release.gpg",dist_tmp_dir / "Release.gpg", caching=True)
+    check_and_download(
+        f"{base_url}/dists/{dist}/Release.gpg",
+        dist_tmp_dir / "Release.gpg",
+        caching=True,
+    )
 
-    comp_dir,comp_tmp_dir = mkdir_with_dot_tmp(dist_dir / repo)
+    comp_dir, comp_tmp_dir = mkdir_with_dot_tmp(dist_dir / repo)
 
-	# load Package Index URLs from the Release file
+    # load Package Index URLs from the Release file
     release_file = dist_tmp_dir / "Release"
     arch_dir = arch if arch in ARCH_NO_PKGIDX else f"binary-{arch}"
-    pkgidx_dir,pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir)
+    pkgidx_dir, pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir)
     with open(release_file, "r") as fd:
-        pkgidx_content=None
-        cnt_start=False
+        pkgidx_content = None
+        cnt_start = False
         for line in fd:
             if cnt_start:
                 fields = line.split()
-                if len(fields) != 3 or len(fields[0]) != 64: # 64 is SHA-256 checksum length
+                if (
+                    len(fields) != 3 or len(fields[0]) != 64
+                ):  # 64 is SHA-256 checksum length
                     break
                 checksum, filesize, filename = tuple(fields)
-                if filename.startswith(f"{repo}/{arch_dir}/") or \
-                   filename.startswith(f"{repo}/Contents-{arch}") or \
-                   filename.startswith(f"Contents-{arch}"):
+                if (
+                    filename.startswith(f"{repo}/{arch_dir}/")
+                    or filename.startswith(f"{repo}/Contents-{arch}")
+                    or filename.startswith(f"Contents-{arch}")
+                ):
                     fn = Path(filename)
                     if len(fn.parts) <= 3:
                         # Contents-amd64.gz
@@ -163,7 +204,13 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
                     else:
                         # main/dep11/by-hash/MD5Sum/0af5c69679a24671cfd7579095a9cb5e
                         # deep_tmp_dir is in pkgidx_tmp_dir hence no extra garbage collection needed
-                        deep_tmp_dir = dist_dir / Path(fn.parts[0]) / Path(fn.parts[1]) / ".tmp" / Path('/'.join(fn.parts[2:-1]))
+                        deep_tmp_dir = (
+                            dist_dir
+                            / Path(fn.parts[0])
+                            / Path(fn.parts[1])
+                            / ".tmp"
+                            / Path("/".join(fn.parts[2:-1]))
+                        )
                         deep_tmp_dir.mkdir(parents=True, exist_ok=True)
                         pkgidx_file = deep_tmp_dir / fn.name
                 else:
@@ -174,33 +221,41 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa
                     print("Failed to download:", pkglist_url)
                     continue
 
-                with pkgidx_file.open('rb') as t: content = t.read()
+                with pkgidx_file.open("rb") as t:
+                    content = t.read()
                 if len(content) != int(filesize):
-                    print(f"Invalid size of {pkgidx_file}, expected {filesize}, skipped")
+                    print(
+                        f"Invalid size of {pkgidx_file}, expected {filesize}, skipped"
+                    )
                     pkgidx_file.unlink()
                     continue
                 if hashlib.sha256(content).hexdigest() != checksum:
-                    print(f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped")
+                    print(
+                        f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped"
+                    )
                     pkgidx_file.unlink()
                     continue
-                if pkgidx_content is None and pkgidx_file.stem == 'Packages':
-                    print(f"getting packages index content from {pkgidx_file.name}", flush=True)
+                if pkgidx_content is None and pkgidx_file.stem == "Packages":
+                    print(
+                        f"getting packages index content from {pkgidx_file.name}",
+                        flush=True,
+                    )
                     suffix = pkgidx_file.suffix
-                    if suffix == '.xz':
-                        pkgidx_content = lzma.decompress(content).decode('utf-8')
-                    elif suffix == '.bz2':
-                        pkgidx_content = bz2.decompress(content).decode('utf-8')
-                    elif suffix == '.gz':
-                        pkgidx_content = gzip.decompress(content).decode('utf-8')
-                    elif suffix == '':
-                        pkgidx_content = content.decode('utf-8')
+                    if suffix == ".xz":
+                        pkgidx_content = lzma.decompress(content).decode("utf-8")
+                    elif suffix == ".bz2":
+                        pkgidx_content = bz2.decompress(content).decode("utf-8")
+                    elif suffix == ".gz":
+                        pkgidx_content = gzip.decompress(content).decode("utf-8")
+                    elif suffix == "":
+                        pkgidx_content = content.decode("utf-8")
                     else:
                         print("unsupported format")
 
             # Currently only support SHA-256 checksum, because
             # "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field."
             # from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files
-            if line.startswith('SHA256:'):
+            if line.startswith("SHA256:"):
                 cnt_start = True
     if not cnt_start:
         print("Cannot find SHA-256 checksum")
@@ -219,6 +274,7 @@ def collect_tmp_dir():
         except:
             traceback.print_exc()
             return 1
+
     if arch in ARCH_NO_PKGIDX:
         if collect_tmp_dir() == 1:
             return 1
@@ -227,8 +283,10 @@ def collect_tmp_dir():
 
     if pkgidx_content is None:
         print("index is empty, failed")
-        if len(list(pkgidx_dir.glob('Packages*'))) == 0:
-            print(f"{pkgidx_dir/'Packages'} never existed, upstream may not provide {dist}/{repo}/{arch}, ignore this error")
+        if len(list(pkgidx_dir.glob("Packages*"))) == 0:
+            print(
+                f"{pkgidx_dir/'Packages'} never existed, upstream may not provide {dist}/{repo}/{arch}, ignore this error"
+            )
             return 0
         return 1
 
@@ -236,8 +294,8 @@ def collect_tmp_dir():
     err = 0
     deb_count = 0
     deb_size = 0
-    for pkg in pkgidx_content.split('\n\n'):
-        if len(pkg) < 10: # ignore blanks
+    for pkg in pkgidx_content.split("\n\n"):
+        if len(pkg) < 10:  # ignore blanks
             continue
         try:
             pkg_filename = pattern_package_name.search(pkg).group(1)
@@ -255,14 +313,14 @@ def collect_tmp_dir():
         dest_dir = dest_filename.parent
         if not dest_dir.is_dir():
             dest_dir.mkdir(parents=True, exist_ok=True)
-        if dest_filename.suffix == '.deb':
+        if dest_filename.suffix == ".deb":
             deb_set[str(dest_filename.relative_to(dest_base_dir))] = pkg_size
         if dest_filename.is_file() and dest_filename.stat().st_size == pkg_size:
             print(f"Skipping {pkg_filename}, size {pkg_size}")
             continue
 
-        pkg_url=f"{base_url}/{pkg_filename}"
-        dest_tmp_filename = dest_filename.with_name('._syncing_.' + dest_filename.name)
+        pkg_url = f"{base_url}/{pkg_filename}"
+        dest_tmp_filename = dest_filename.with_name("._syncing_." + dest_filename.name)
         for retry in range(MAX_RETRY):
             print(f"downloading {pkg_url} to {dest_filename}", flush=True)
             # break # dry run
@@ -289,19 +347,25 @@ def collect_tmp_dir():
     print(f"{deb_count} packages, {deb_size} bytes in total", flush=True)
     return err
 
+
 def apt_delete_old_debs(dest_base_dir: Path, remote_set: Dict[str, int], dry_run: bool):
-    on_disk = set([
-        str(i.relative_to(dest_base_dir)) for i in dest_base_dir.glob('**/*.deb')])
+    on_disk = set(
+        [str(i.relative_to(dest_base_dir)) for i in dest_base_dir.glob("**/*.deb")]
+    )
     deleting = on_disk - remote_set.keys()
     # print(on_disk)
     # print(remote_set)
-    print(f"Deleting {len(deleting)} packages not in the index{' (dry run)' if dry_run else ''}", flush=True)
+    print(
+        f"Deleting {len(deleting)} packages not in the index{' (dry run)' if dry_run else ''}",
+        flush=True,
+    )
     for i in deleting:
         if dry_run:
             print("Will delete", i)
         else:
             print("Deleting", i)
-            (dest_base_dir/i).unlink()
+            (dest_base_dir / i).unlink()
+
 
 def main():
 
@@ -311,31 +375,35 @@ def main():
     parser.add_argument("component", type=str, help="e.g. multiverse,contrib")
     parser.add_argument("arch", type=str, help="e.g. i386,amd64")
     parser.add_argument("working_dir", type=Path, help="working directory")
-    parser.add_argument("--delete", action='store_true',
-                        help='delete unreferenced package files')
-    parser.add_argument("--delete-dry-run", action='store_true',
-                        help='print package files to be deleted only')
+    parser.add_argument(
+        "--delete", action="store_true", help="delete unreferenced package files"
+    )
+    parser.add_argument(
+        "--delete-dry-run",
+        action="store_true",
+        help="print package files to be deleted only",
+    )
     args = parser.parse_args()
 
     # generate lists of os codenames
-    os_list = args.os_version.split(',')
+    os_list = args.os_version.split(",")
     check_args("os_version", os_list)
     os_list = replace_os_template(os_list)
 
     # generate a list of components and archs for each os codename
     def generate_list_for_oses(raw: str, name: str) -> List[List[str]]:
         n_os = len(os_list)
-        if ':' in raw:
+        if ":" in raw:
             # specify os codenames for each component
             lists = []
-            for l in raw.split(':'):
-                list_for_os = l.split(',')
+            for l in raw.split(":"):
+                list_for_os = l.split(",")
                 check_args(name, list_for_os)
                 lists.append(list_for_os)
             assert len(lists) == n_os, f"{name} must be specified for each component"
         else:
             # use same os codenames for all components
-            l = raw.split(',')
+            l = raw.split(",")
             check_args(name, l)
             lists = [l] * n_os
         return lists
@@ -350,7 +418,12 @@ def generate_list_for_oses(raw: str, name: str) -> List[List[str]]:
     for os, arch_list, comp_list in zip(os_list, arch_lists, component_lists):
         for comp in comp_list:
             for arch in arch_list:
-                if apt_mirror(args.base_url, os, comp, arch, args.working_dir, deb_set=deb_set) != 0:
+                if (
+                    apt_mirror(
+                        args.base_url, os, comp, arch, args.working_dir, deb_set=deb_set
+                    )
+                    != 0
+                ):
                     failed.append((os, comp, arch))
     if len(failed) > 0:
         print(f"Failed APT repos of {args.base_url}: ", failed)
@@ -363,5 +436,6 @@ def generate_list_for_oses(raw: str, name: str) -> List[List[str]]:
             total_size = sum(deb_set.values())
             fd.write(f"+{total_size}")
 
+
 if __name__ == "__main__":
     main()
diff --git a/github-raw.py b/github-raw.py
index 17f7e88..4c31fb9 100755
--- a/github-raw.py
+++ b/github-raw.py
@@ -1,36 +1,46 @@
 #!/usr/bin/env python3
 import os
-import sys
 import threading
-import traceback
 import queue
 from pathlib import Path
-from datetime import datetime
 import tempfile
-import hashlib
 
 import requests
 
 BASE_URL = os.getenv("TUNASYNC_UPSTREAM_URL", "https://api.github.com/repos/")
 WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR")
-MIRROR_BASE_URL = os.getenv("MIRROR_BASE_URL", 'https://mirrors.tuna.tsinghua.edu.cn/github-raw/')
+MIRROR_BASE_URL = os.getenv(
+    "MIRROR_BASE_URL", "https://mirrors.tuna.tsinghua.edu.cn/github-raw/"
+)
+
 
 def raw_to_mirror(s: str) -> str:
-    return s.replace("https://raw.githubusercontent.com/",
-            MIRROR_BASE_URL)
+    return s.replace("https://raw.githubusercontent.com/", MIRROR_BASE_URL)
+
 
 def delete_line_with(w: str, s: str) -> str:
     return "\n".join(list(filter(lambda x: x.count(w) == 0, s.splitlines())))
 
+
 def delete_line_with_gbpdistro(s: str) -> str:
     return delete_line_with("gbpdistro", s)
 
+
 REPOS = [
     # owner/repo, tree, tree, tree, blob
     ## for stackage
     ["fpco/stackage-content", "master", "stack", "global-hints.yaml"],
     ## for rosdep
-    { "path": ["ros/rosdistro", "master", "rosdep", "sources.list.d", "20-default.list"], "filter": [ raw_to_mirror, delete_line_with_gbpdistro ] },
+    {
+        "path": [
+            "ros/rosdistro",
+            "master",
+            "rosdep",
+            "sources.list.d",
+            "20-default.list",
+        ],
+        "filter": [raw_to_mirror, delete_line_with_gbpdistro],
+    },
     ["ros/rosdistro", "master", "rosdep", "osx-homebrew.yaml"],
     ["ros/rosdistro", "master", "rosdep", "base.yaml"],
     ["ros/rosdistro", "master", "rosdep", "python.yaml"],
@@ -44,36 +54,46 @@ def delete_line_with_gbpdistro(s: str) -> str:
 TIMEOUT_OPTION = (7, 10)
 total_size = 0
 
+
 # wrap around requests.get to use token if available
 def github_get(*args, **kwargs):
-    headers = kwargs['headers'] if 'headers' in kwargs else {}
-    if 'GITHUB_TOKEN' in os.environ:
-        headers['Authorization'] = 'token {}'.format(
-            os.environ['GITHUB_TOKEN'])
-    kwargs['headers'] = headers
+    headers = kwargs["headers"] if "headers" in kwargs else {}
+    if "GITHUB_TOKEN" in os.environ:
+        headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"])
+    kwargs["headers"] = headers
     return requests.get(*args, **kwargs)
 
+
 def github_tree(*args, **kwargs):
-    headers = kwargs['headers'] if 'headers' in kwargs else {}
+    headers = kwargs["headers"] if "headers" in kwargs else {}
     headers["Accept"] = "application/vnd.github.v3+json"
-    kwargs['headers'] = headers
+    kwargs["headers"] = headers
     return github_get(*args, **kwargs)
 
+
 # NOTE blob API supports file up to 100MB
 # To get larger one, we need raw.githubcontent, which is not implemented now
 def github_blob(*args, **kwargs):
-    headers = kwargs['headers'] if 'headers' in kwargs else {}
+    headers = kwargs["headers"] if "headers" in kwargs else {}
     headers["Accept"] = "application/vnd.github.v3.raw"
-    kwargs['headers'] = headers
+    kwargs["headers"] = headers
     return github_get(*args, **kwargs)
 
-def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, filter=None):
+
+def do_download(
+    remote_url: str, dst_file: Path, remote_size: int, sha: str, filter=None
+):
     # NOTE the stream=True parameter below
     with github_blob(remote_url, stream=True) as r:
         r.raise_for_status()
         tmp_dst_file = None
         try:
-            with tempfile.NamedTemporaryFile(prefix="." + dst_file.name + ".", suffix=".tmp", dir=dst_file.parent, delete=False) as f:
+            with tempfile.NamedTemporaryFile(
+                prefix="." + dst_file.name + ".",
+                suffix=".tmp",
+                dir=dst_file.parent,
+                delete=False,
+            ) as f:
                 tmp_dst_file = Path(f.name)
                 for chunk in r.iter_content(chunk_size=1024**2):
                     if chunk:  # filter out keep-alive new chunks
@@ -82,7 +102,9 @@ def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, fil
             # check for downloaded size
             downloaded_size = tmp_dst_file.stat().st_size
             if remote_size != -1 and downloaded_size != remote_size:
-                raise Exception(f'File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes')
+                raise Exception(
+                    f"File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes"
+                )
             if filter != None:
                 with open(tmp_dst_file, "r+") as f:
                     s = f.read()
@@ -108,25 +130,26 @@ def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, fil
                 if tmp_dst_file.is_file():
                     tmp_dst_file.unlink()
 
+
 def downloading_worker(q):
     while True:
         item = q.get()
         if item is None:
             break
 
-        filter = item.pop(0) # remove filter
+        filter = item.pop(0)  # remove filter
 
-        dst_file = Path('/'.join(item))
+        dst_file = Path("/".join(item))
         dst_file.parent.mkdir(parents=True, exist_ok=True)
 
-        item.pop(0) # remove working dir
+        item.pop(0)  # remove working dir
         owner_repo = item.pop(0)
         try:
             tree = item.pop(0)
             tree_child = item.pop(0)
             child_is_leaf = False
-            url = ''
-            sha = ''
+            url = ""
+            sha = ""
             size = 0
             while not child_is_leaf:
                 with github_tree(f"{BASE_URL}{owner_repo}/git/trees/{tree}") as r:
@@ -147,8 +170,7 @@ def downloading_worker(q):
                             break
                     else:
                         raise Exception
-            if not dst_file.is_symlink() or \
-                Path(os.readlink(dst_file)).name != sha:
+            if not dst_file.is_symlink() or Path(os.readlink(dst_file)).name != sha:
                 do_download(url, dst_file, size, sha, filter)
             else:
                 print("Skip", dst_file)
@@ -164,16 +186,19 @@ def downloading_worker(q):
 def create_workers(n):
     task_queue = queue.Queue()
     for i in range(n):
-        t = threading.Thread(target=downloading_worker, args=(task_queue, ))
+        t = threading.Thread(target=downloading_worker, args=(task_queue,))
         t.start()
     return task_queue
 
+
 def main():
     import argparse
+
     parser = argparse.ArgumentParser()
     parser.add_argument("--working-dir", default=WORKING_DIR)
-    parser.add_argument("--workers", default=1, type=int,
-                        help='number of concurrent downloading jobs')
+    parser.add_argument(
+        "--workers", default=1, type=int, help="number of concurrent downloading jobs"
+    )
     args = parser.parse_args()
 
     if args.working_dir is None:
@@ -198,6 +223,7 @@ def main():
     for i in range(args.workers):
         task_queue.put(None)
 
+
 if __name__ == "__main__":
     main()
 
diff --git a/homebrew-bottles.py b/homebrew-bottles.py
index 8007704..0fc46a7 100755
--- a/homebrew-bottles.py
+++ b/homebrew-bottles.py
@@ -10,25 +10,30 @@
 
 # mainly from apt-sync.py
 
-FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API = os.getenv("TUNASYNC_UPSTREAM_URL", "https://api.github.com/repos/Homebrew/formulae.brew.sh/actions/artifacts?name=github-pages")
+FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API = os.getenv(
+    "TUNASYNC_UPSTREAM_URL",
+    "https://api.github.com/repos/Homebrew/formulae.brew.sh/actions/artifacts?name=github-pages",
+)
 WORKING_DIR = Path(os.getenv("TUNASYNC_WORKING_DIR", "/data"))
-DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
+DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))
 
 github_api_headers = {
     "Accept": "application/vnd.github+json",
     "X-GitHub-Api-Version": "2022-11-28",
 }
 
-if 'GITHUB_TOKEN' in os.environ:
-    github_api_headers['Authorization'] = 'token {}'.format(
-        os.environ['GITHUB_TOKEN'])
+if "GITHUB_TOKEN" in os.environ:
+    github_api_headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"])
 else:
     # https://github.com/actions/upload-artifact/issues/51
     # the token should have 'public_repo' access
     raise Exception("GITHUB_TOKEN is required")
 
+
 def formulae_github_pages(zip_file: Path, unzip_directory: Path, tar_directory: Path):
-    artifacts = requests.get(FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API, headers=github_api_headers)
+    artifacts = requests.get(
+        FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API, headers=github_api_headers
+    )
     artifacts.raise_for_status()
     artifacts = artifacts.json()
     latest = None
@@ -40,7 +45,10 @@ def formulae_github_pages(zip_file: Path, unzip_directory: Path, tar_directory:
 
     check_and_download(zip_url, zip_file, zip_file, github_api_headers)
     sp.run(["unzip", str(zip_file), "-d", str(unzip_directory)])
-    sp.run(["tar", "-C", str(tar_directory), "-xf", str(unzip_directory / "artifact.tar")])
+    sp.run(
+        ["tar", "-C", str(tar_directory), "-xf", str(unzip_directory / "artifact.tar")]
+    )
+
 
 def bottles(formula_file: Path):
     b = {}
@@ -49,7 +57,7 @@ def bottles(formula_file: Path):
     for formula in formulae:
         if formula["versions"]["bottle"] and "stable" in formula["bottle"]:
             bs = formula["bottle"]["stable"]
-            for (platform, v) in bs["files"].items():
+            for platform, v in bs["files"].items():
                 sha256 = v["sha256"]
                 url = v["url"]
                 name = formula["name"]
@@ -63,28 +71,36 @@ def bottles(formula_file: Path):
                 }
     return b
 
+
 ghcr_headers = {
     "Accept": "application/vnd.oci.image.index.v1+json",
-    "Authorization": "Bearer QQ=="
+    "Authorization": "Bearer QQ==",
 }
 
+
 # borrowed from apt-sync.py
-def check_and_download(url: str, dst_file: Path, dst_tmp_file: Path, headers=ghcr_headers):
-    if dst_file.is_file(): return 2 # old file
+def check_and_download(
+    url: str, dst_file: Path, dst_tmp_file: Path, headers=ghcr_headers
+):
+    if dst_file.is_file():
+        return 2  # old file
     try:
         start = time.time()
         with requests.get(url, stream=True, timeout=(5, 10), headers=headers) as r:
             r.raise_for_status()
-            if 'last-modified' in r.headers:
+            if "last-modified" in r.headers:
                 remote_ts = parsedate_to_datetime(
-                    r.headers['last-modified']).timestamp()
-            else: remote_ts = None
+                    r.headers["last-modified"]
+                ).timestamp()
+            else:
+                remote_ts = None
 
-            with dst_tmp_file.open('wb') as f:
+            with dst_tmp_file.open("wb") as f:
                 for chunk in r.iter_content(chunk_size=1024**2):
                     if time.time() - start > DOWNLOAD_TIMEOUT:
                         raise TimeoutError("Download timeout")
-                    if not chunk: continue # filter out keep-alive new chunks
+                    if not chunk:
+                        continue  # filter out keep-alive new chunks
 
                     f.write(chunk)
             if remote_ts is not None:
@@ -92,9 +108,11 @@ def check_and_download(url: str, dst_file: Path, dst_tmp_file: Path, headers=ghc
         return 0
     except BaseException as e:
         print(e, flush=True)
-        if dst_tmp_file.is_file(): dst_tmp_file.unlink()
+        if dst_tmp_file.is_file():
+            dst_tmp_file.unlink()
     return 1
 
+
 if __name__ == "__main__":
     # clean tmp file from previous sync
     TMP_DIR = WORKING_DIR / ".tmp"
diff --git a/yum-sync.py b/yum-sync.py
index 085a48e..cf15959 100755
--- a/yum-sync.py
+++ b/yum-sync.py
@@ -1,7 +1,6 @@
 #!/usr/bin/env python3
 import traceback
 import os
-import sys
 import subprocess as sp
 import tempfile
 import argparse
@@ -16,47 +15,50 @@
 from typing import List, Dict
 import requests
 
-REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '')
-DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800'))
+REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "")
+DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800"))
 REPO_STAT = {}
 
+
 def calc_repo_size(path: Path):
-    dbfiles = path.glob('repodata/*primary.*')
+    dbfiles = path.glob("repodata/*primary.*")
     with tempfile.NamedTemporaryFile() as tmp:
         dec = None
         dbfile = None
         for db in dbfiles:
             dbfile = db
             suffixes = db.suffixes
-            if suffixes[-1] == '.bz2':
+            if suffixes[-1] == ".bz2":
                 dec = bz2.decompress
                 suffixes = suffixes[:-1]
-            elif suffixes[-1] == '.gz':
+            elif suffixes[-1] == ".gz":
                 dec = gzip.decompress
                 suffixes = suffixes[:-1]
-            elif suffixes[-1] in ('.sqlite', '.xml'):
+            elif suffixes[-1] in (".sqlite", ".xml"):
                 dec = lambda x: x
         if dec is None:
             print(f"Failed to read from {path}: {list(dbfiles)}", flush=True)
             return
-        with db.open('rb') as f:
+        with db.open("rb") as f:
             tmp.write(dec(f.read()))
             tmp.flush()
 
-        if suffixes[-1] == '.sqlite':
+        if suffixes[-1] == ".sqlite":
             conn = sqlite3.connect(tmp.name)
             c = conn.cursor()
             c.execute("select sum(size_package),count(1) from packages")
             size, cnt = c.fetchone()
             conn.close()
-        elif suffixes[-1] == '.xml':
+        elif suffixes[-1] == ".xml":
             try:
                 tree = ET.parse(tmp.name)
                 root = tree.getroot()
-                assert root.tag.endswith('metadata')
+                assert root.tag.endswith("metadata")
                 cnt, size = 0, 0
-                for location in root.findall('./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size'):
-                    size += int(location.attrib['package'])
+                for location in root.findall(
+                    "./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size"
+                ):
+                    size += int(location.attrib["package"])
                     cnt += 1
             except:
                 traceback.print_exc()
@@ -69,23 +71,27 @@ def calc_repo_size(path: Path):
         print(f"  {cnt} packages, {size} bytes in total", flush=True)
 
         global REPO_STAT
-        REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0) # size can be None
+        REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0)  # size can be None
+
 
-def check_and_download(url: str, dst_file: Path)->int:
+def check_and_download(url: str, dst_file: Path) -> int:
     try:
         start = time.time()
         with requests.get(url, stream=True, timeout=(5, 10)) as r:
             r.raise_for_status()
-            if 'last-modified' in r.headers:
+            if "last-modified" in r.headers:
                 remote_ts = parsedate_to_datetime(
-                    r.headers['last-modified']).timestamp()
-            else: remote_ts = None
+                    r.headers["last-modified"]
+                ).timestamp()
+            else:
+                remote_ts = None
 
-            with dst_file.open('wb') as f:
+            with dst_file.open("wb") as f:
                 for chunk in r.iter_content(chunk_size=1024**2):
                     if time.time() - start > DOWNLOAD_TIMEOUT:
                         raise TimeoutError("Download timeout")
-                    if not chunk: continue # filter out keep-alive new chunks
+                    if not chunk:
+                        continue  # filter out keep-alive new chunks
 
                     f.write(chunk)
             if remote_ts is not None:
@@ -93,13 +99,15 @@ def check_and_download(url: str, dst_file: Path)->int:
         return 0
     except BaseException as e:
         print(e, flush=True)
-        if dst_file.is_file(): dst_file.unlink()
+        if dst_file.is_file():
+            dst_file.unlink()
     return 1
 
+
 def download_repodata(url: str, path: Path) -> int:
     path = path / "repodata"
     path.mkdir(exist_ok=True)
-    oldfiles = set(path.glob('*.*'))
+    oldfiles = set(path.glob("*.*"))
     newfiles = set()
     if check_and_download(url + "/repodata/repomd.xml", path / ".repomd.xml") != 0:
         print(f"Failed to download the repomd.xml of {url}")
@@ -107,64 +115,78 @@ def download_repodata(url: str, path: Path) -> int:
     try:
         tree = ET.parse(path / ".repomd.xml")
         root = tree.getroot()
-        assert root.tag.endswith('repomd')
-        for location in root.findall('./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location'):
-                href = location.attrib['href']
-                assert len(href) > 9 and href[:9] == 'repodata/'
-                fn = path / href[9:]
-                newfiles.add(fn)
-                if check_and_download(url + '/' + href, fn) != 0:
-                    print(f"Failed to download the {href}")
-                    return 1
+        assert root.tag.endswith("repomd")
+        for location in root.findall(
+            "./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location"
+        ):
+            href = location.attrib["href"]
+            assert len(href) > 9 and href[:9] == "repodata/"
+            fn = path / href[9:]
+            newfiles.add(fn)
+            if check_and_download(url + "/" + href, fn) != 0:
+                print(f"Failed to download the {href}")
+                return 1
     except BaseException as e:
         traceback.print_exc()
         return 1
 
-    (path / ".repomd.xml").rename(path / "repomd.xml") # update the repomd.xml
+    (path / ".repomd.xml").rename(path / "repomd.xml")  # update the repomd.xml
     newfiles.add(path / "repomd.xml")
-    for i in (oldfiles - newfiles):
+    for i in oldfiles - newfiles:
         print(f"Deleting old files: {i}")
         i.unlink()
 
+
 def check_args(prop: str, lst: List[str]):
     for s in lst:
-        if len(s)==0 or ' ' in s:
+        if len(s) == 0 or " " in s:
             raise ValueError(f"Invalid item in {prop}: {repr(s)}")
 
+
 def substitute_vars(s: str, vardict: Dict[str, str]) -> str:
     for key, val in vardict.items():
-        tpl = "@{"+key+"}"
+        tpl = "@{" + key + "}"
         s = s.replace(tpl, val)
     return s
 
+
 def main():
 
     parser = argparse.ArgumentParser()
     parser.add_argument("base_url", type=str, help="base URL")
     parser.add_argument("os_version", type=str, help="e.g. 7-8,9")
-    parser.add_argument("component", type=str, help="e.g. mysql56-community,mysql57-community")
+    parser.add_argument(
+        "component", type=str, help="e.g. mysql56-community,mysql57-community"
+    )
     parser.add_argument("arch", type=str, help="e.g. x86_64,aarch64")
     parser.add_argument("repo_name", type=str, help="e.g. @{comp}-el@{os_ver}")
     parser.add_argument("working_dir", type=Path, help="working directory")
-    parser.add_argument("--download-repodata", action='store_true',
-                        help='download repodata files instead of generating them')
-    parser.add_argument("--pass-arch-to-reposync", action='store_true',
-                        help='''pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)''')
+    parser.add_argument(
+        "--download-repodata",
+        action="store_true",
+        help="download repodata files instead of generating them",
+    )
+    parser.add_argument(
+        "--pass-arch-to-reposync",
+        action="store_true",
+        help="""pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)""",
+    )
     args = parser.parse_args()
 
     os_list = []
-    for os_version in args.os_version.split(','):
-        if '-' in os_version and '-stream' not in os_version:
-            dash = os_version.index('-')
-            os_list = os_list + [ str(i) for i in range(
-                int(os_version[:dash]),
-                1+int(os_version[dash+1:])) ]
+    for os_version in args.os_version.split(","):
+        if "-" in os_version and "-stream" not in os_version:
+            dash = os_version.index("-")
+            os_list = os_list + [
+                str(i)
+                for i in range(int(os_version[:dash]), 1 + int(os_version[dash + 1 :]))
+            ]
         else:
             os_list.append(os_version)
     check_args("os_version", os_list)
-    component_list = args.component.split(',')
+    component_list = args.component.split(",")
     check_args("component", component_list)
-    arch_list = args.arch.split(',')
+    arch_list = args.arch.split(",")
     check_args("arch", arch_list)
 
     failed = []
@@ -175,16 +197,18 @@ def combination_os_comp(arch: str):
         for os in os_list:
             for comp in component_list:
                 vardict = {
-                    'arch': arch,
-                    'os_ver': os,
-                    'comp': comp,
+                    "arch": arch,
+                    "os_ver": os,
+                    "comp": comp,
                 }
 
                 name = substitute_vars(args.repo_name, vardict)
                 url = substitute_vars(args.base_url, vardict)
                 try:
-                    probe_url = url + ('' if url.endswith('/') else '/') + "repodata/repomd.xml"
-                    r = requests.head(probe_url, timeout=(7,7))
+                    probe_url = (
+                        url + ("" if url.endswith("/") else "/") + "repodata/repomd.xml"
+                    )
+                    r = requests.head(probe_url, timeout=(7, 7))
                     if r.status_code < 400 or r.status_code == 403:
                         yield (name, url)
                     else:
@@ -195,19 +219,23 @@ def combination_os_comp(arch: str):
     for arch in arch_list:
         dest_dirs = []
         conf = tempfile.NamedTemporaryFile("w", suffix=".conf")
-        conf.write('''
+        conf.write(
+            """
 [main]
 keepcache=0
-''')
+"""
+        )
         for name, url in combination_os_comp(arch):
-            conf.write(f'''
+            conf.write(
+                f"""
 [{name}]
 name={name}
 baseurl={url}
 repo_gpgcheck=0
 gpgcheck=0
 enabled=1
-''')
+"""
+            )
             dst = (args.working_dir / name).absolute()
             dst.mkdir(parents=True, exist_ok=True)
             dest_dirs.append(dst)
@@ -217,13 +245,18 @@ def combination_os_comp(arch: str):
 
         if len(dest_dirs) == 0:
             print("Nothing to sync", flush=True)
-            failed.append(('', arch))
+            failed.append(("", arch))
             continue
 
         cmd_args = [
-            "dnf", "reposync",
-            "-c", conf.name,
-            "--delete", "-p", str(args.working_dir.absolute())]
+            "dnf",
+            "reposync",
+            "-c",
+            conf.name,
+            "--delete",
+            "-p",
+            str(args.working_dir.absolute()),
+        ]
         if args.pass_arch_to_reposync:
             cmd_args += ["--arch", arch]
         print(f"Launching dnf reposync with command: {cmd_args}", flush=True)
@@ -237,7 +270,16 @@ def combination_os_comp(arch: str):
             if args.download_repodata:
                 download_repodata(url, path)
             else:
-                cmd_args = ["createrepo_c", "--update", "-v", "-c", cache_dir, "-o", str(path), str(path)]
+                cmd_args = [
+                    "createrepo_c",
+                    "--update",
+                    "-v",
+                    "-c",
+                    cache_dir,
+                    "-o",
+                    str(path),
+                    str(path),
+                ]
                 print(f"Launching createrepo with command: {cmd_args}", flush=True)
                 ret = sp.run(cmd_args)
             calc_repo_size(path)
@@ -250,5 +292,6 @@ def combination_os_comp(arch: str):
                 total_size = sum([r[0] for r in REPO_STAT.values()])
                 fd.write(f"+{total_size}")
 
+
 if __name__ == "__main__":
     main()