From 5f4bc1c2608c7c152ff8dc439f160eb6dfc8a8b5 Mon Sep 17 00:00:00 2001 From: Harry Chen Date: Sat, 17 Aug 2024 13:33:31 +0800 Subject: [PATCH] Run formatter on some Python scripts Signed-off-by: Harry Chen --- adoptium.py | 8 +- anaconda.py | 156 ++++++++++++++++++----------- apt-sync.py | 238 +++++++++++++++++++++++++++++--------------- github-raw.py | 86 ++++++++++------ homebrew-bottles.py | 52 ++++++---- yum-sync.py | 167 +++++++++++++++++++------------ 6 files changed, 449 insertions(+), 258 deletions(-) diff --git a/adoptium.py b/adoptium.py index a51a07d..38dd196 100755 --- a/adoptium.py +++ b/adoptium.py @@ -1,17 +1,11 @@ #!/usr/bin/env python3 import hashlib -import traceback -import json import os -import re -import shutil import subprocess as sp -import tempfile -import argparse import time from email.utils import parsedate_to_datetime from pathlib import Path -from typing import List, Set, Tuple, IO +from typing import Set import requests DOWNLOAD_TIMEOUT = int(os.getenv('DOWNLOAD_TIMEOUT', '1800')) diff --git a/anaconda.py b/anaconda.py index fa2ed8d..c20878a 100755 --- a/anaconda.py +++ b/anaconda.py @@ -25,6 +25,7 @@ WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR") +# fmt: off CONDA_REPOS = ("main", "free", "r", "msys2") CONDA_ARCHES = ( "noarch", "linux-64", "linux-32", "linux-aarch64", "linux-armv6l", "linux-armv7l", @@ -72,6 +73,7 @@ EXCLUDED_PACKAGES = ( "pytorch-nightly", "pytorch-nightly-cpu", "ignite-nightly", ) +# fmt: on # connect and read timeout value TIMEOUT_OPTION = (7, 10) @@ -84,28 +86,31 @@ format="[%(asctime)s] [%(levelname)s] %(message)s", ) -def sizeof_fmt(num, suffix='iB'): - for unit in ['','K','M','G','T','P','E','Z']: + +def sizeof_fmt(num, suffix="iB"): + for unit in ["", "K", "M", "G", "T", "P", "E", "Z"]: if abs(num) < 1024.0: return "%3.2f%s%s" % (num, unit, suffix) num /= 1024.0 - return "%.2f%s%s" % (num, 'Y', suffix) + return "%.2f%s%s" % (num, "Y", suffix) + def md5_check(file: Path, md5: str = None): m = hashlib.md5() - with file.open('rb') as f: + with file.open("rb") as f: while True: - buf = f.read(1*1024*1024) + buf = f.read(1 * 1024 * 1024) if not buf: break m.update(buf) return m.hexdigest() == md5 + def sha256_check(file: Path, sha256: str = None): m = hashlib.sha256() - with file.open('rb') as f: + with file.open("rb") as f: while True: - buf = f.read(1*1024*1024) + buf = f.read(1 * 1024 * 1024) if not buf: break m.update(buf) @@ -113,34 +118,42 @@ def sha256_check(file: Path, sha256: str = None): def curl_download(remote_url: str, dst_file: Path, sha256: str = None, md5: str = None): - sp.check_call([ - "curl", "-o", str(dst_file), - "-sL", "--remote-time", "--show-error", - "--fail", "--retry", "10", "--speed-time", "15", - "--speed-limit", "5000", remote_url, - ]) + # fmt: off + sp.check_call( + [ + "curl", "-o", str(dst_file), + "-sL", "--remote-time", "--show-error", + "--fail", "--retry", "10", + "--speed-time", "15", + "--speed-limit", "5000", + remote_url, + ] + ) + # fmt: on if sha256 and (not sha256_check(dst_file, sha256)): return "SHA256 mismatch" if md5 and (not md5_check(dst_file, md5)): return "MD5 mismatch" -def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove_legacy: bool): +def sync_repo( + repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove_legacy: bool +): logging.info("Start syncing {}".format(repo_url)) local_dir.mkdir(parents=True, exist_ok=True) - repodata_url = repo_url + '/repodata.json' - bz2_repodata_url = repo_url + '/repodata.json.bz2' + repodata_url = repo_url + "/repodata.json" + bz2_repodata_url = repo_url + "/repodata.json.bz2" # https://github.com/conda/conda/issues/13256, from conda 24.1.x - zst_repodata_url = repo_url + '/repodata.json.zst' + zst_repodata_url = repo_url + "/repodata.json.zst" # https://docs.conda.io/projects/conda-build/en/latest/release-notes.html # "current_repodata.json" - like repodata.json, but only has the newest version of each file - current_repodata_url = repo_url + '/current_repodata.json' + current_repodata_url = repo_url + "/current_repodata.json" tmp_repodata = tmpdir / "repodata.json" tmp_bz2_repodata = tmpdir / "repodata.json.bz2" tmp_zst_repodata = tmpdir / "repodata.json.zst" - tmp_current_repodata = tmpdir / 'current_repodata.json' + tmp_current_repodata = tmpdir / "current_repodata.json" curl_download(repodata_url, tmp_repodata) curl_download(bz2_repodata_url, tmp_bz2_repodata) @@ -158,31 +171,33 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove remote_filelist = [] total_size = 0 - legacy_packages = repodata['packages'] + legacy_packages = repodata["packages"] conda_packages = repodata.get("packages.conda", {}) if remove_legacy: # https://github.com/anaconda/conda/blob/0dbf85e0546e0b0dc060c8265ec936591ccbe980/conda/core/subdir_data.py#L440-L442 - use_legacy_packages = set(legacy_packages.keys()) - set(k[:-6] + ".tar.bz2" for k in conda_packages.keys()) + use_legacy_packages = set(legacy_packages.keys()) - set( + k[:-6] + ".tar.bz2" for k in conda_packages.keys() + ) legacy_packages = {k: legacy_packages[k] for k in use_legacy_packages} packages = {**legacy_packages, **conda_packages} for filename, meta in packages.items(): - if meta['name'] in EXCLUDED_PACKAGES: + if meta["name"] in EXCLUDED_PACKAGES: continue - file_size = meta['size'] + file_size = meta["size"] # prefer sha256 over md5 sha256 = None md5 = None - if 'sha256' in meta: - sha256 = meta['sha256'] - elif 'md5' in meta: - md5 = meta['md5'] + if "sha256" in meta: + sha256 = meta["sha256"] + elif "md5" in meta: + md5 = meta["md5"] total_size += file_size - pkg_url = '/'.join([repo_url, filename]) + pkg_url = "/".join([repo_url, filename]) dst_file = local_dir / filename - dst_file_wip = local_dir / ('.downloading.' + filename) + dst_file_wip = local_dir / (".downloading." + filename) remote_filelist.append(dst_file) if dst_file.is_file(): @@ -202,7 +217,7 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove if err is None: dst_file_wip.rename(dst_file) except sp.CalledProcessError: - err = 'CalledProcessError' + err = "CalledProcessError" if err is None: break logging.error("Failed to download {}: {}".format(filename, err)) @@ -223,11 +238,15 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove tmp_current_repodata_gz_gened = False if tmp_current_repodata.is_file(): if os.path.getsize(tmp_current_repodata) > GEN_METADATA_JSON_GZIP_THRESHOLD: - sp.check_call(["gzip", "--no-name", "--keep", "--", str(tmp_current_repodata)]) - shutil.move(str(tmp_current_repodata) + ".gz", str(local_dir / "current_repodata.json.gz")) + sp.check_call( + ["gzip", "--no-name", "--keep", "--", str(tmp_current_repodata)] + ) + shutil.move( + str(tmp_current_repodata) + ".gz", + str(local_dir / "current_repodata.json.gz"), + ) tmp_current_repodata_gz_gened = True - shutil.move(str(tmp_current_repodata), str( - local_dir / "current_repodata.json")) + shutil.move(str(tmp_current_repodata), str(local_dir / "current_repodata.json")) if not tmp_current_repodata_gz_gened: # If the gzip file is not generated, remove the dangling gzip archive Path(local_dir / "current_repodata.json.gz").unlink(missing_ok=True) @@ -235,9 +254,9 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove if delete: local_filelist = [] delete_count = 0 - for i in local_dir.glob('*.tar.bz2'): + for i in local_dir.glob("*.tar.bz2"): local_filelist.append(i) - for i in local_dir.glob('*.conda'): + for i in local_dir.glob("*.conda"): local_filelist.append(i) for i in set(local_filelist) - set(remote_filelist): logging.info("Deleting {}".format(i)) @@ -245,46 +264,53 @@ def sync_repo(repo_url: str, local_dir: Path, tmpdir: Path, delete: bool, remove delete_count += 1 logging.info("{} files deleted".format(delete_count)) - logging.info("{}: {} files, {} in total".format( - repodata_url, len(remote_filelist), sizeof_fmt(total_size))) + logging.info( + "{}: {} files, {} in total".format( + repodata_url, len(remote_filelist), sizeof_fmt(total_size) + ) + ) return total_size + def sync_installer(repo_url, local_dir: Path): logging.info("Start syncing {}".format(repo_url)) local_dir.mkdir(parents=True, exist_ok=True) - full_scan = random.random() < 0.1 # Do full version check less frequently + full_scan = random.random() < 0.1 # Do full version check less frequently def remote_list(): r = requests.get(repo_url, timeout=TIMEOUT_OPTION) d = pq(r.content) - for tr in d('table').find('tr'): - tds = pq(tr).find('td') + for tr in d("table").find("tr"): + tds = pq(tr).find("td") if len(tds) != 4: continue - fname = tds[0].find('a').text + fname = tds[0].find("a").text sha256 = tds[3].text - if sha256 == '' or len(sha256) != 64: + if sha256 == "" or len(sha256) != 64: continue yield (fname, sha256) for filename, sha256 in remote_list(): pkg_url = "/".join([repo_url, filename]) dst_file = local_dir / filename - dst_file_wip = local_dir / ('.downloading.' + filename) + dst_file_wip = local_dir / (".downloading." + filename) if dst_file.is_file(): r = requests.head(pkg_url, allow_redirects=True, timeout=TIMEOUT_OPTION) - len_avail = 'content-length' in r.headers + len_avail = "content-length" in r.headers if len_avail: - remote_filesize = int(r.headers['content-length']) - remote_date = parsedate_to_datetime(r.headers['last-modified']) + remote_filesize = int(r.headers["content-length"]) + remote_date = parsedate_to_datetime(r.headers["last-modified"]) stat = dst_file.stat() local_filesize = stat.st_size local_mtime = stat.st_mtime # Do content verification on ~5% of files (see issue #25) - if (not len_avail or remote_filesize == local_filesize) and remote_date.timestamp() == local_mtime and \ - (random.random() < 0.95 or sha256_check(dst_file, sha256)): + if ( + (not len_avail or remote_filesize == local_filesize) + and remote_date.timestamp() == local_mtime + and (random.random() < 0.95 or sha256_check(dst_file, sha256)) + ): logging.info("Skipping {}".format(filename)) # Stop the scanning if the most recent version is present @@ -299,25 +325,31 @@ def remote_list(): for retry in range(3): logging.info("Downloading {}".format(filename)) - err = '' + err = "" try: err = curl_download(pkg_url, dst_file_wip, sha256=sha256) if err is None: dst_file_wip.rename(dst_file) except sp.CalledProcessError: - err = 'CalledProcessError' + err = "CalledProcessError" if err is None: break logging.error("Failed to download {}: {}".format(filename, err)) + def main(): import argparse + parser = argparse.ArgumentParser() parser.add_argument("--working-dir", default=WORKING_DIR) - parser.add_argument("--delete", action='store_true', - help='delete unreferenced package files') - parser.add_argument("--remove-legacy", action='store_true', - help='delete legacy packages which have conda counterpart. Requires client conda >= 4.7.0') + parser.add_argument( + "--delete", action="store_true", help="delete unreferenced package files" + ) + parser.add_argument( + "--remove-legacy", + action="store_true", + help="delete legacy packages which have conda counterpart. Requires client conda >= 4.7.0", + ) args = parser.parse_args() if args.working_dir is None: @@ -336,7 +368,8 @@ def main(): try: sync_installer(remote_url, local_dir) size_statistics += sum( - f.stat().st_size for f in local_dir.glob('*') if f.is_file()) + f.stat().st_size for f in local_dir.glob("*") if f.is_file() + ) except Exception: logging.exception("Failed to sync installers of {}".format(dist)) success = False @@ -348,8 +381,9 @@ def main(): tmpdir = tempfile.mkdtemp() try: - size_statistics += sync_repo(remote_url, - local_dir, Path(tmpdir), args.delete, args.remove_legacy) + size_statistics += sync_repo( + remote_url, local_dir, Path(tmpdir), args.delete, args.remove_legacy + ) except Exception: logging.exception("Failed to sync repo: {}/{}".format(repo, arch)) success = False @@ -362,8 +396,9 @@ def main(): tmpdir = tempfile.mkdtemp() try: - size_statistics += sync_repo(remote_url, - local_dir, Path(tmpdir), args.delete, args.remove_legacy) + size_statistics += sync_repo( + remote_url, local_dir, Path(tmpdir), args.delete, args.remove_legacy + ) except Exception: logging.exception("Failed to sync repo: {}".format(repo)) success = False @@ -374,6 +409,7 @@ def main(): if not success: sys.exit(1) + if __name__ == "__main__": main() diff --git a/apt-sync.py b/apt-sync.py index 5005798..b8b533e 100755 --- a/apt-sync.py +++ b/apt-sync.py @@ -4,7 +4,6 @@ import os import re import shutil -import subprocess as sp import argparse import bz2 import gzip @@ -23,21 +22,27 @@ # set preferred address family import requests.packages.urllib3.util.connection as urllib3_cn -USE_ADDR_FAMILY = os.getenv('USE_ADDR_FAMILY', '').strip().lower() -if USE_ADDR_FAMILY != '': - assert USE_ADDR_FAMILY in ['ipv4', 'ipv6'], "USE_ADDR_FAMILY must be either ipv4 or ipv6" - urllib3_cn.allowed_gai_family = lambda: socket.AF_INET if USE_ADDR_FAMILY == 'ipv4' else socket.AF_INET6 + +USE_ADDR_FAMILY = os.getenv("USE_ADDR_FAMILY", "").strip().lower() +if USE_ADDR_FAMILY != "": + assert USE_ADDR_FAMILY in [ + "ipv4", + "ipv6", + ], "USE_ADDR_FAMILY must be either ipv4 or ipv6" + urllib3_cn.allowed_gai_family = lambda: ( + socket.AF_INET if USE_ADDR_FAMILY == "ipv4" else socket.AF_INET6 + ) OS_TEMPLATE = { - 'ubuntu-lts': ["focal", "jammy", "noble"], - 'debian-current': ["bullseye", "bookworm"], - 'debian-latest2': ["bullseye", "bookworm"], - 'debian-latest': ["bookworm"], + "ubuntu-lts": ["focal", "jammy", "noble"], + "debian-current": ["bullseye", "bookworm"], + "debian-latest2": ["bullseye", "bookworm"], + "debian-latest": ["bookworm"], } -ARCH_NO_PKGIDX = ['dep11', 'i18n', 'cnf'] -MAX_RETRY=int(os.getenv('MAX_RETRY', '3')) -DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800')) -REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '') +ARCH_NO_PKGIDX = ["dep11", "i18n", "cnf"] +MAX_RETRY = int(os.getenv("MAX_RETRY", "3")) +DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800")) +REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "") pattern_os_template = re.compile(r"@\{(.+)\}") pattern_package_name = re.compile(r"^Filename: (.+)$", re.MULTILINE) @@ -45,11 +50,13 @@ pattern_package_sha256 = re.compile(r"^SHA256: (\w{64})$", re.MULTILINE) download_cache = dict() + def check_args(prop: str, lst: List[str]): for s in lst: - if len(s)==0 or ' ' in s: + if len(s) == 0 or " " in s: raise ValueError(f"Invalid item in {prop}: {repr(s)}") + def replace_os_template(os_list: List[str]) -> List[str]: ret = [] for i in os_list: @@ -57,103 +64,137 @@ def replace_os_template(os_list: List[str]) -> List[str]: if matched: for os in OS_TEMPLATE[matched.group(1)]: ret.append(pattern_os_template.sub(os, i)) - elif i.startswith('@'): + elif i.startswith("@"): ret.extend(OS_TEMPLATE[i[1:]]) else: ret.append(i) return ret -def check_and_download(url: str, dst_file: Path, caching = False)->int: + +def check_and_download(url: str, dst_file: Path, caching=False) -> int: try: if caching: if url in download_cache: print(f"Using cached content: {url}", flush=True) - with dst_file.open('wb') as f: + with dst_file.open("wb") as f: f.write(download_cache[url]) return 0 download_cache[url] = bytes() start = time.time() with requests.get(url, stream=True, timeout=(5, 10)) as r: r.raise_for_status() - if 'last-modified' in r.headers: + if "last-modified" in r.headers: remote_ts = parsedate_to_datetime( - r.headers['last-modified']).timestamp() - else: remote_ts = None + r.headers["last-modified"] + ).timestamp() + else: + remote_ts = None - with dst_file.open('wb') as f: + with dst_file.open("wb") as f: for chunk in r.iter_content(chunk_size=1024**2): if time.time() - start > DOWNLOAD_TIMEOUT: raise TimeoutError("Download timeout") - if not chunk: continue # filter out keep-alive new chunks + if not chunk: + continue # filter out keep-alive new chunks f.write(chunk) - if caching: download_cache[url] += chunk + if caching: + download_cache[url] += chunk if remote_ts is not None: os.utime(dst_file, (remote_ts, remote_ts)) return 0 except BaseException as e: print(e, flush=True) - if dst_file.is_file(): dst_file.unlink() - if url in download_cache: del download_cache[url] + if dst_file.is_file(): + dst_file.unlink() + if url in download_cache: + del download_cache[url] return 1 -def mkdir_with_dot_tmp(folder: Path)->Tuple[Path, Path]: + +def mkdir_with_dot_tmp(folder: Path) -> Tuple[Path, Path]: tmpdir = folder / ".tmp" if tmpdir.is_dir(): shutil.rmtree(str(tmpdir)) tmpdir.mkdir(parents=True, exist_ok=True) return (folder, tmpdir) + def move_files_in(src: Path, dst: Path): empty = True - for file in src.glob('*'): + for file in src.glob("*"): empty = False print(f"moving {file} to {dst}") # shutil.move(str(file), str(dst)) if file.is_dir(): (dst / file.name).mkdir(parents=True, exist_ok=True) move_files_in(file, dst / file.name) - file.rmdir() # rmdir wont fail as all files in it have been moved + file.rmdir() # rmdir wont fail as all files in it have been moved else: - file.rename(dst / file.name) # Overwrite files + file.rename(dst / file.name) # Overwrite files if empty: print(f"{src} is empty") -def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Path, deb_set: Dict[str, int])->int: + +def apt_mirror( + base_url: str, + dist: str, + repo: str, + arch: str, + dest_base_dir: Path, + deb_set: Dict[str, int], +) -> int: if not dest_base_dir.is_dir(): print("Destination directory is empty, cannot continue") return 1 print(f"Started mirroring {base_url} {dist}, {repo}, {arch}!", flush=True) - # download Release files - dist_dir,dist_tmp_dir = mkdir_with_dot_tmp(dest_base_dir / "dists" / dist) - check_and_download(f"{base_url}/dists/{dist}/InRelease",dist_tmp_dir / "InRelease", caching=True) - if check_and_download(f"{base_url}/dists/{dist}/Release",dist_tmp_dir / "Release", caching=True) != 0: + # download Release files + dist_dir, dist_tmp_dir = mkdir_with_dot_tmp(dest_base_dir / "dists" / dist) + check_and_download( + f"{base_url}/dists/{dist}/InRelease", dist_tmp_dir / "InRelease", caching=True + ) + if ( + check_and_download( + f"{base_url}/dists/{dist}/Release", dist_tmp_dir / "Release", caching=True + ) + != 0 + ): print("Invalid Repository") - if not (dist_dir/"Release").is_file(): - print(f"{dist_dir/'Release'} never existed, upstream may not provide packages for {dist}, ignore this error") + if not (dist_dir / "Release").is_file(): + print( + f"{dist_dir/'Release'} never existed, upstream may not provide packages for {dist}, ignore this error" + ) return 0 return 1 - check_and_download(f"{base_url}/dists/{dist}/Release.gpg",dist_tmp_dir / "Release.gpg", caching=True) + check_and_download( + f"{base_url}/dists/{dist}/Release.gpg", + dist_tmp_dir / "Release.gpg", + caching=True, + ) - comp_dir,comp_tmp_dir = mkdir_with_dot_tmp(dist_dir / repo) + comp_dir, comp_tmp_dir = mkdir_with_dot_tmp(dist_dir / repo) - # load Package Index URLs from the Release file + # load Package Index URLs from the Release file release_file = dist_tmp_dir / "Release" arch_dir = arch if arch in ARCH_NO_PKGIDX else f"binary-{arch}" - pkgidx_dir,pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir) + pkgidx_dir, pkgidx_tmp_dir = mkdir_with_dot_tmp(comp_dir / arch_dir) with open(release_file, "r") as fd: - pkgidx_content=None - cnt_start=False + pkgidx_content = None + cnt_start = False for line in fd: if cnt_start: fields = line.split() - if len(fields) != 3 or len(fields[0]) != 64: # 64 is SHA-256 checksum length + if ( + len(fields) != 3 or len(fields[0]) != 64 + ): # 64 is SHA-256 checksum length break checksum, filesize, filename = tuple(fields) - if filename.startswith(f"{repo}/{arch_dir}/") or \ - filename.startswith(f"{repo}/Contents-{arch}") or \ - filename.startswith(f"Contents-{arch}"): + if ( + filename.startswith(f"{repo}/{arch_dir}/") + or filename.startswith(f"{repo}/Contents-{arch}") + or filename.startswith(f"Contents-{arch}") + ): fn = Path(filename) if len(fn.parts) <= 3: # Contents-amd64.gz @@ -163,7 +204,13 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa else: # main/dep11/by-hash/MD5Sum/0af5c69679a24671cfd7579095a9cb5e # deep_tmp_dir is in pkgidx_tmp_dir hence no extra garbage collection needed - deep_tmp_dir = dist_dir / Path(fn.parts[0]) / Path(fn.parts[1]) / ".tmp" / Path('/'.join(fn.parts[2:-1])) + deep_tmp_dir = ( + dist_dir + / Path(fn.parts[0]) + / Path(fn.parts[1]) + / ".tmp" + / Path("/".join(fn.parts[2:-1])) + ) deep_tmp_dir.mkdir(parents=True, exist_ok=True) pkgidx_file = deep_tmp_dir / fn.name else: @@ -174,33 +221,41 @@ def apt_mirror(base_url: str, dist: str, repo: str, arch: str, dest_base_dir: Pa print("Failed to download:", pkglist_url) continue - with pkgidx_file.open('rb') as t: content = t.read() + with pkgidx_file.open("rb") as t: + content = t.read() if len(content) != int(filesize): - print(f"Invalid size of {pkgidx_file}, expected {filesize}, skipped") + print( + f"Invalid size of {pkgidx_file}, expected {filesize}, skipped" + ) pkgidx_file.unlink() continue if hashlib.sha256(content).hexdigest() != checksum: - print(f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped") + print( + f"Invalid checksum of {pkgidx_file}, expected {checksum}, skipped" + ) pkgidx_file.unlink() continue - if pkgidx_content is None and pkgidx_file.stem == 'Packages': - print(f"getting packages index content from {pkgidx_file.name}", flush=True) + if pkgidx_content is None and pkgidx_file.stem == "Packages": + print( + f"getting packages index content from {pkgidx_file.name}", + flush=True, + ) suffix = pkgidx_file.suffix - if suffix == '.xz': - pkgidx_content = lzma.decompress(content).decode('utf-8') - elif suffix == '.bz2': - pkgidx_content = bz2.decompress(content).decode('utf-8') - elif suffix == '.gz': - pkgidx_content = gzip.decompress(content).decode('utf-8') - elif suffix == '': - pkgidx_content = content.decode('utf-8') + if suffix == ".xz": + pkgidx_content = lzma.decompress(content).decode("utf-8") + elif suffix == ".bz2": + pkgidx_content = bz2.decompress(content).decode("utf-8") + elif suffix == ".gz": + pkgidx_content = gzip.decompress(content).decode("utf-8") + elif suffix == "": + pkgidx_content = content.decode("utf-8") else: print("unsupported format") # Currently only support SHA-256 checksum, because # "Clients may not use the MD5Sum and SHA1 fields for security purposes, and must require a SHA256 or a SHA512 field." # from https://wiki.debian.org/DebianRepository/Format#A.22Release.22_files - if line.startswith('SHA256:'): + if line.startswith("SHA256:"): cnt_start = True if not cnt_start: print("Cannot find SHA-256 checksum") @@ -219,6 +274,7 @@ def collect_tmp_dir(): except: traceback.print_exc() return 1 + if arch in ARCH_NO_PKGIDX: if collect_tmp_dir() == 1: return 1 @@ -227,8 +283,10 @@ def collect_tmp_dir(): if pkgidx_content is None: print("index is empty, failed") - if len(list(pkgidx_dir.glob('Packages*'))) == 0: - print(f"{pkgidx_dir/'Packages'} never existed, upstream may not provide {dist}/{repo}/{arch}, ignore this error") + if len(list(pkgidx_dir.glob("Packages*"))) == 0: + print( + f"{pkgidx_dir/'Packages'} never existed, upstream may not provide {dist}/{repo}/{arch}, ignore this error" + ) return 0 return 1 @@ -236,8 +294,8 @@ def collect_tmp_dir(): err = 0 deb_count = 0 deb_size = 0 - for pkg in pkgidx_content.split('\n\n'): - if len(pkg) < 10: # ignore blanks + for pkg in pkgidx_content.split("\n\n"): + if len(pkg) < 10: # ignore blanks continue try: pkg_filename = pattern_package_name.search(pkg).group(1) @@ -255,14 +313,14 @@ def collect_tmp_dir(): dest_dir = dest_filename.parent if not dest_dir.is_dir(): dest_dir.mkdir(parents=True, exist_ok=True) - if dest_filename.suffix == '.deb': + if dest_filename.suffix == ".deb": deb_set[str(dest_filename.relative_to(dest_base_dir))] = pkg_size if dest_filename.is_file() and dest_filename.stat().st_size == pkg_size: print(f"Skipping {pkg_filename}, size {pkg_size}") continue - pkg_url=f"{base_url}/{pkg_filename}" - dest_tmp_filename = dest_filename.with_name('._syncing_.' + dest_filename.name) + pkg_url = f"{base_url}/{pkg_filename}" + dest_tmp_filename = dest_filename.with_name("._syncing_." + dest_filename.name) for retry in range(MAX_RETRY): print(f"downloading {pkg_url} to {dest_filename}", flush=True) # break # dry run @@ -289,19 +347,25 @@ def collect_tmp_dir(): print(f"{deb_count} packages, {deb_size} bytes in total", flush=True) return err + def apt_delete_old_debs(dest_base_dir: Path, remote_set: Dict[str, int], dry_run: bool): - on_disk = set([ - str(i.relative_to(dest_base_dir)) for i in dest_base_dir.glob('**/*.deb')]) + on_disk = set( + [str(i.relative_to(dest_base_dir)) for i in dest_base_dir.glob("**/*.deb")] + ) deleting = on_disk - remote_set.keys() # print(on_disk) # print(remote_set) - print(f"Deleting {len(deleting)} packages not in the index{' (dry run)' if dry_run else ''}", flush=True) + print( + f"Deleting {len(deleting)} packages not in the index{' (dry run)' if dry_run else ''}", + flush=True, + ) for i in deleting: if dry_run: print("Will delete", i) else: print("Deleting", i) - (dest_base_dir/i).unlink() + (dest_base_dir / i).unlink() + def main(): @@ -311,31 +375,35 @@ def main(): parser.add_argument("component", type=str, help="e.g. multiverse,contrib") parser.add_argument("arch", type=str, help="e.g. i386,amd64") parser.add_argument("working_dir", type=Path, help="working directory") - parser.add_argument("--delete", action='store_true', - help='delete unreferenced package files') - parser.add_argument("--delete-dry-run", action='store_true', - help='print package files to be deleted only') + parser.add_argument( + "--delete", action="store_true", help="delete unreferenced package files" + ) + parser.add_argument( + "--delete-dry-run", + action="store_true", + help="print package files to be deleted only", + ) args = parser.parse_args() # generate lists of os codenames - os_list = args.os_version.split(',') + os_list = args.os_version.split(",") check_args("os_version", os_list) os_list = replace_os_template(os_list) # generate a list of components and archs for each os codename def generate_list_for_oses(raw: str, name: str) -> List[List[str]]: n_os = len(os_list) - if ':' in raw: + if ":" in raw: # specify os codenames for each component lists = [] - for l in raw.split(':'): - list_for_os = l.split(',') + for l in raw.split(":"): + list_for_os = l.split(",") check_args(name, list_for_os) lists.append(list_for_os) assert len(lists) == n_os, f"{name} must be specified for each component" else: # use same os codenames for all components - l = raw.split(',') + l = raw.split(",") check_args(name, l) lists = [l] * n_os return lists @@ -350,7 +418,12 @@ def generate_list_for_oses(raw: str, name: str) -> List[List[str]]: for os, arch_list, comp_list in zip(os_list, arch_lists, component_lists): for comp in comp_list: for arch in arch_list: - if apt_mirror(args.base_url, os, comp, arch, args.working_dir, deb_set=deb_set) != 0: + if ( + apt_mirror( + args.base_url, os, comp, arch, args.working_dir, deb_set=deb_set + ) + != 0 + ): failed.append((os, comp, arch)) if len(failed) > 0: print(f"Failed APT repos of {args.base_url}: ", failed) @@ -363,5 +436,6 @@ def generate_list_for_oses(raw: str, name: str) -> List[List[str]]: total_size = sum(deb_set.values()) fd.write(f"+{total_size}") + if __name__ == "__main__": main() diff --git a/github-raw.py b/github-raw.py index 17f7e88..4c31fb9 100755 --- a/github-raw.py +++ b/github-raw.py @@ -1,36 +1,46 @@ #!/usr/bin/env python3 import os -import sys import threading -import traceback import queue from pathlib import Path -from datetime import datetime import tempfile -import hashlib import requests BASE_URL = os.getenv("TUNASYNC_UPSTREAM_URL", "https://api.github.com/repos/") WORKING_DIR = os.getenv("TUNASYNC_WORKING_DIR") -MIRROR_BASE_URL = os.getenv("MIRROR_BASE_URL", 'https://mirrors.tuna.tsinghua.edu.cn/github-raw/') +MIRROR_BASE_URL = os.getenv( + "MIRROR_BASE_URL", "https://mirrors.tuna.tsinghua.edu.cn/github-raw/" +) + def raw_to_mirror(s: str) -> str: - return s.replace("https://raw.githubusercontent.com/", - MIRROR_BASE_URL) + return s.replace("https://raw.githubusercontent.com/", MIRROR_BASE_URL) + def delete_line_with(w: str, s: str) -> str: return "\n".join(list(filter(lambda x: x.count(w) == 0, s.splitlines()))) + def delete_line_with_gbpdistro(s: str) -> str: return delete_line_with("gbpdistro", s) + REPOS = [ # owner/repo, tree, tree, tree, blob ## for stackage ["fpco/stackage-content", "master", "stack", "global-hints.yaml"], ## for rosdep - { "path": ["ros/rosdistro", "master", "rosdep", "sources.list.d", "20-default.list"], "filter": [ raw_to_mirror, delete_line_with_gbpdistro ] }, + { + "path": [ + "ros/rosdistro", + "master", + "rosdep", + "sources.list.d", + "20-default.list", + ], + "filter": [raw_to_mirror, delete_line_with_gbpdistro], + }, ["ros/rosdistro", "master", "rosdep", "osx-homebrew.yaml"], ["ros/rosdistro", "master", "rosdep", "base.yaml"], ["ros/rosdistro", "master", "rosdep", "python.yaml"], @@ -44,36 +54,46 @@ def delete_line_with_gbpdistro(s: str) -> str: TIMEOUT_OPTION = (7, 10) total_size = 0 + # wrap around requests.get to use token if available def github_get(*args, **kwargs): - headers = kwargs['headers'] if 'headers' in kwargs else {} - if 'GITHUB_TOKEN' in os.environ: - headers['Authorization'] = 'token {}'.format( - os.environ['GITHUB_TOKEN']) - kwargs['headers'] = headers + headers = kwargs["headers"] if "headers" in kwargs else {} + if "GITHUB_TOKEN" in os.environ: + headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"]) + kwargs["headers"] = headers return requests.get(*args, **kwargs) + def github_tree(*args, **kwargs): - headers = kwargs['headers'] if 'headers' in kwargs else {} + headers = kwargs["headers"] if "headers" in kwargs else {} headers["Accept"] = "application/vnd.github.v3+json" - kwargs['headers'] = headers + kwargs["headers"] = headers return github_get(*args, **kwargs) + # NOTE blob API supports file up to 100MB # To get larger one, we need raw.githubcontent, which is not implemented now def github_blob(*args, **kwargs): - headers = kwargs['headers'] if 'headers' in kwargs else {} + headers = kwargs["headers"] if "headers" in kwargs else {} headers["Accept"] = "application/vnd.github.v3.raw" - kwargs['headers'] = headers + kwargs["headers"] = headers return github_get(*args, **kwargs) -def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, filter=None): + +def do_download( + remote_url: str, dst_file: Path, remote_size: int, sha: str, filter=None +): # NOTE the stream=True parameter below with github_blob(remote_url, stream=True) as r: r.raise_for_status() tmp_dst_file = None try: - with tempfile.NamedTemporaryFile(prefix="." + dst_file.name + ".", suffix=".tmp", dir=dst_file.parent, delete=False) as f: + with tempfile.NamedTemporaryFile( + prefix="." + dst_file.name + ".", + suffix=".tmp", + dir=dst_file.parent, + delete=False, + ) as f: tmp_dst_file = Path(f.name) for chunk in r.iter_content(chunk_size=1024**2): if chunk: # filter out keep-alive new chunks @@ -82,7 +102,9 @@ def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, fil # check for downloaded size downloaded_size = tmp_dst_file.stat().st_size if remote_size != -1 and downloaded_size != remote_size: - raise Exception(f'File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes') + raise Exception( + f"File {dst_file.as_posix()} size mismatch: downloaded {downloaded_size} bytes, expected {remote_size} bytes" + ) if filter != None: with open(tmp_dst_file, "r+") as f: s = f.read() @@ -108,25 +130,26 @@ def do_download(remote_url: str, dst_file: Path, remote_size: int, sha: str, fil if tmp_dst_file.is_file(): tmp_dst_file.unlink() + def downloading_worker(q): while True: item = q.get() if item is None: break - filter = item.pop(0) # remove filter + filter = item.pop(0) # remove filter - dst_file = Path('/'.join(item)) + dst_file = Path("/".join(item)) dst_file.parent.mkdir(parents=True, exist_ok=True) - item.pop(0) # remove working dir + item.pop(0) # remove working dir owner_repo = item.pop(0) try: tree = item.pop(0) tree_child = item.pop(0) child_is_leaf = False - url = '' - sha = '' + url = "" + sha = "" size = 0 while not child_is_leaf: with github_tree(f"{BASE_URL}{owner_repo}/git/trees/{tree}") as r: @@ -147,8 +170,7 @@ def downloading_worker(q): break else: raise Exception - if not dst_file.is_symlink() or \ - Path(os.readlink(dst_file)).name != sha: + if not dst_file.is_symlink() or Path(os.readlink(dst_file)).name != sha: do_download(url, dst_file, size, sha, filter) else: print("Skip", dst_file) @@ -164,16 +186,19 @@ def downloading_worker(q): def create_workers(n): task_queue = queue.Queue() for i in range(n): - t = threading.Thread(target=downloading_worker, args=(task_queue, )) + t = threading.Thread(target=downloading_worker, args=(task_queue,)) t.start() return task_queue + def main(): import argparse + parser = argparse.ArgumentParser() parser.add_argument("--working-dir", default=WORKING_DIR) - parser.add_argument("--workers", default=1, type=int, - help='number of concurrent downloading jobs') + parser.add_argument( + "--workers", default=1, type=int, help="number of concurrent downloading jobs" + ) args = parser.parse_args() if args.working_dir is None: @@ -198,6 +223,7 @@ def main(): for i in range(args.workers): task_queue.put(None) + if __name__ == "__main__": main() diff --git a/homebrew-bottles.py b/homebrew-bottles.py index 8007704..0fc46a7 100755 --- a/homebrew-bottles.py +++ b/homebrew-bottles.py @@ -10,25 +10,30 @@ # mainly from apt-sync.py -FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API = os.getenv("TUNASYNC_UPSTREAM_URL", "https://api.github.com/repos/Homebrew/formulae.brew.sh/actions/artifacts?name=github-pages") +FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API = os.getenv( + "TUNASYNC_UPSTREAM_URL", + "https://api.github.com/repos/Homebrew/formulae.brew.sh/actions/artifacts?name=github-pages", +) WORKING_DIR = Path(os.getenv("TUNASYNC_WORKING_DIR", "/data")) -DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800')) +DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800")) github_api_headers = { "Accept": "application/vnd.github+json", "X-GitHub-Api-Version": "2022-11-28", } -if 'GITHUB_TOKEN' in os.environ: - github_api_headers['Authorization'] = 'token {}'.format( - os.environ['GITHUB_TOKEN']) +if "GITHUB_TOKEN" in os.environ: + github_api_headers["Authorization"] = "token {}".format(os.environ["GITHUB_TOKEN"]) else: # https://github.com/actions/upload-artifact/issues/51 # the token should have 'public_repo' access raise Exception("GITHUB_TOKEN is required") + def formulae_github_pages(zip_file: Path, unzip_directory: Path, tar_directory: Path): - artifacts = requests.get(FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API, headers=github_api_headers) + artifacts = requests.get( + FORMULAE_BREW_SH_GITHUB_ACTIONS_ARTIFACT_API, headers=github_api_headers + ) artifacts.raise_for_status() artifacts = artifacts.json() latest = None @@ -40,7 +45,10 @@ def formulae_github_pages(zip_file: Path, unzip_directory: Path, tar_directory: check_and_download(zip_url, zip_file, zip_file, github_api_headers) sp.run(["unzip", str(zip_file), "-d", str(unzip_directory)]) - sp.run(["tar", "-C", str(tar_directory), "-xf", str(unzip_directory / "artifact.tar")]) + sp.run( + ["tar", "-C", str(tar_directory), "-xf", str(unzip_directory / "artifact.tar")] + ) + def bottles(formula_file: Path): b = {} @@ -49,7 +57,7 @@ def bottles(formula_file: Path): for formula in formulae: if formula["versions"]["bottle"] and "stable" in formula["bottle"]: bs = formula["bottle"]["stable"] - for (platform, v) in bs["files"].items(): + for platform, v in bs["files"].items(): sha256 = v["sha256"] url = v["url"] name = formula["name"] @@ -63,28 +71,36 @@ def bottles(formula_file: Path): } return b + ghcr_headers = { "Accept": "application/vnd.oci.image.index.v1+json", - "Authorization": "Bearer QQ==" + "Authorization": "Bearer QQ==", } + # borrowed from apt-sync.py -def check_and_download(url: str, dst_file: Path, dst_tmp_file: Path, headers=ghcr_headers): - if dst_file.is_file(): return 2 # old file +def check_and_download( + url: str, dst_file: Path, dst_tmp_file: Path, headers=ghcr_headers +): + if dst_file.is_file(): + return 2 # old file try: start = time.time() with requests.get(url, stream=True, timeout=(5, 10), headers=headers) as r: r.raise_for_status() - if 'last-modified' in r.headers: + if "last-modified" in r.headers: remote_ts = parsedate_to_datetime( - r.headers['last-modified']).timestamp() - else: remote_ts = None + r.headers["last-modified"] + ).timestamp() + else: + remote_ts = None - with dst_tmp_file.open('wb') as f: + with dst_tmp_file.open("wb") as f: for chunk in r.iter_content(chunk_size=1024**2): if time.time() - start > DOWNLOAD_TIMEOUT: raise TimeoutError("Download timeout") - if not chunk: continue # filter out keep-alive new chunks + if not chunk: + continue # filter out keep-alive new chunks f.write(chunk) if remote_ts is not None: @@ -92,9 +108,11 @@ def check_and_download(url: str, dst_file: Path, dst_tmp_file: Path, headers=ghc return 0 except BaseException as e: print(e, flush=True) - if dst_tmp_file.is_file(): dst_tmp_file.unlink() + if dst_tmp_file.is_file(): + dst_tmp_file.unlink() return 1 + if __name__ == "__main__": # clean tmp file from previous sync TMP_DIR = WORKING_DIR / ".tmp" diff --git a/yum-sync.py b/yum-sync.py index 085a48e..cf15959 100755 --- a/yum-sync.py +++ b/yum-sync.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 import traceback import os -import sys import subprocess as sp import tempfile import argparse @@ -16,47 +15,50 @@ from typing import List, Dict import requests -REPO_SIZE_FILE = os.getenv('REPO_SIZE_FILE', '') -DOWNLOAD_TIMEOUT=int(os.getenv('DOWNLOAD_TIMEOUT', '1800')) +REPO_SIZE_FILE = os.getenv("REPO_SIZE_FILE", "") +DOWNLOAD_TIMEOUT = int(os.getenv("DOWNLOAD_TIMEOUT", "1800")) REPO_STAT = {} + def calc_repo_size(path: Path): - dbfiles = path.glob('repodata/*primary.*') + dbfiles = path.glob("repodata/*primary.*") with tempfile.NamedTemporaryFile() as tmp: dec = None dbfile = None for db in dbfiles: dbfile = db suffixes = db.suffixes - if suffixes[-1] == '.bz2': + if suffixes[-1] == ".bz2": dec = bz2.decompress suffixes = suffixes[:-1] - elif suffixes[-1] == '.gz': + elif suffixes[-1] == ".gz": dec = gzip.decompress suffixes = suffixes[:-1] - elif suffixes[-1] in ('.sqlite', '.xml'): + elif suffixes[-1] in (".sqlite", ".xml"): dec = lambda x: x if dec is None: print(f"Failed to read from {path}: {list(dbfiles)}", flush=True) return - with db.open('rb') as f: + with db.open("rb") as f: tmp.write(dec(f.read())) tmp.flush() - if suffixes[-1] == '.sqlite': + if suffixes[-1] == ".sqlite": conn = sqlite3.connect(tmp.name) c = conn.cursor() c.execute("select sum(size_package),count(1) from packages") size, cnt = c.fetchone() conn.close() - elif suffixes[-1] == '.xml': + elif suffixes[-1] == ".xml": try: tree = ET.parse(tmp.name) root = tree.getroot() - assert root.tag.endswith('metadata') + assert root.tag.endswith("metadata") cnt, size = 0, 0 - for location in root.findall('./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size'): - size += int(location.attrib['package']) + for location in root.findall( + "./{http://linux.duke.edu/metadata/common}package/{http://linux.duke.edu/metadata/common}size" + ): + size += int(location.attrib["package"]) cnt += 1 except: traceback.print_exc() @@ -69,23 +71,27 @@ def calc_repo_size(path: Path): print(f" {cnt} packages, {size} bytes in total", flush=True) global REPO_STAT - REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0) # size can be None + REPO_STAT[str(path)] = (size, cnt) if cnt > 0 else (0, 0) # size can be None + -def check_and_download(url: str, dst_file: Path)->int: +def check_and_download(url: str, dst_file: Path) -> int: try: start = time.time() with requests.get(url, stream=True, timeout=(5, 10)) as r: r.raise_for_status() - if 'last-modified' in r.headers: + if "last-modified" in r.headers: remote_ts = parsedate_to_datetime( - r.headers['last-modified']).timestamp() - else: remote_ts = None + r.headers["last-modified"] + ).timestamp() + else: + remote_ts = None - with dst_file.open('wb') as f: + with dst_file.open("wb") as f: for chunk in r.iter_content(chunk_size=1024**2): if time.time() - start > DOWNLOAD_TIMEOUT: raise TimeoutError("Download timeout") - if not chunk: continue # filter out keep-alive new chunks + if not chunk: + continue # filter out keep-alive new chunks f.write(chunk) if remote_ts is not None: @@ -93,13 +99,15 @@ def check_and_download(url: str, dst_file: Path)->int: return 0 except BaseException as e: print(e, flush=True) - if dst_file.is_file(): dst_file.unlink() + if dst_file.is_file(): + dst_file.unlink() return 1 + def download_repodata(url: str, path: Path) -> int: path = path / "repodata" path.mkdir(exist_ok=True) - oldfiles = set(path.glob('*.*')) + oldfiles = set(path.glob("*.*")) newfiles = set() if check_and_download(url + "/repodata/repomd.xml", path / ".repomd.xml") != 0: print(f"Failed to download the repomd.xml of {url}") @@ -107,64 +115,78 @@ def download_repodata(url: str, path: Path) -> int: try: tree = ET.parse(path / ".repomd.xml") root = tree.getroot() - assert root.tag.endswith('repomd') - for location in root.findall('./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location'): - href = location.attrib['href'] - assert len(href) > 9 and href[:9] == 'repodata/' - fn = path / href[9:] - newfiles.add(fn) - if check_and_download(url + '/' + href, fn) != 0: - print(f"Failed to download the {href}") - return 1 + assert root.tag.endswith("repomd") + for location in root.findall( + "./{http://linux.duke.edu/metadata/repo}data/{http://linux.duke.edu/metadata/repo}location" + ): + href = location.attrib["href"] + assert len(href) > 9 and href[:9] == "repodata/" + fn = path / href[9:] + newfiles.add(fn) + if check_and_download(url + "/" + href, fn) != 0: + print(f"Failed to download the {href}") + return 1 except BaseException as e: traceback.print_exc() return 1 - (path / ".repomd.xml").rename(path / "repomd.xml") # update the repomd.xml + (path / ".repomd.xml").rename(path / "repomd.xml") # update the repomd.xml newfiles.add(path / "repomd.xml") - for i in (oldfiles - newfiles): + for i in oldfiles - newfiles: print(f"Deleting old files: {i}") i.unlink() + def check_args(prop: str, lst: List[str]): for s in lst: - if len(s)==0 or ' ' in s: + if len(s) == 0 or " " in s: raise ValueError(f"Invalid item in {prop}: {repr(s)}") + def substitute_vars(s: str, vardict: Dict[str, str]) -> str: for key, val in vardict.items(): - tpl = "@{"+key+"}" + tpl = "@{" + key + "}" s = s.replace(tpl, val) return s + def main(): parser = argparse.ArgumentParser() parser.add_argument("base_url", type=str, help="base URL") parser.add_argument("os_version", type=str, help="e.g. 7-8,9") - parser.add_argument("component", type=str, help="e.g. mysql56-community,mysql57-community") + parser.add_argument( + "component", type=str, help="e.g. mysql56-community,mysql57-community" + ) parser.add_argument("arch", type=str, help="e.g. x86_64,aarch64") parser.add_argument("repo_name", type=str, help="e.g. @{comp}-el@{os_ver}") parser.add_argument("working_dir", type=Path, help="working directory") - parser.add_argument("--download-repodata", action='store_true', - help='download repodata files instead of generating them') - parser.add_argument("--pass-arch-to-reposync", action='store_true', - help='''pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)''') + parser.add_argument( + "--download-repodata", + action="store_true", + help="download repodata files instead of generating them", + ) + parser.add_argument( + "--pass-arch-to-reposync", + action="store_true", + help="""pass --arch to reposync to further filter packages by 'arch' field in metadata (NOT recommended, prone to missing packages in some repositories, e.g. mysql)""", + ) args = parser.parse_args() os_list = [] - for os_version in args.os_version.split(','): - if '-' in os_version and '-stream' not in os_version: - dash = os_version.index('-') - os_list = os_list + [ str(i) for i in range( - int(os_version[:dash]), - 1+int(os_version[dash+1:])) ] + for os_version in args.os_version.split(","): + if "-" in os_version and "-stream" not in os_version: + dash = os_version.index("-") + os_list = os_list + [ + str(i) + for i in range(int(os_version[:dash]), 1 + int(os_version[dash + 1 :])) + ] else: os_list.append(os_version) check_args("os_version", os_list) - component_list = args.component.split(',') + component_list = args.component.split(",") check_args("component", component_list) - arch_list = args.arch.split(',') + arch_list = args.arch.split(",") check_args("arch", arch_list) failed = [] @@ -175,16 +197,18 @@ def combination_os_comp(arch: str): for os in os_list: for comp in component_list: vardict = { - 'arch': arch, - 'os_ver': os, - 'comp': comp, + "arch": arch, + "os_ver": os, + "comp": comp, } name = substitute_vars(args.repo_name, vardict) url = substitute_vars(args.base_url, vardict) try: - probe_url = url + ('' if url.endswith('/') else '/') + "repodata/repomd.xml" - r = requests.head(probe_url, timeout=(7,7)) + probe_url = ( + url + ("" if url.endswith("/") else "/") + "repodata/repomd.xml" + ) + r = requests.head(probe_url, timeout=(7, 7)) if r.status_code < 400 or r.status_code == 403: yield (name, url) else: @@ -195,19 +219,23 @@ def combination_os_comp(arch: str): for arch in arch_list: dest_dirs = [] conf = tempfile.NamedTemporaryFile("w", suffix=".conf") - conf.write(''' + conf.write( + """ [main] keepcache=0 -''') +""" + ) for name, url in combination_os_comp(arch): - conf.write(f''' + conf.write( + f""" [{name}] name={name} baseurl={url} repo_gpgcheck=0 gpgcheck=0 enabled=1 -''') +""" + ) dst = (args.working_dir / name).absolute() dst.mkdir(parents=True, exist_ok=True) dest_dirs.append(dst) @@ -217,13 +245,18 @@ def combination_os_comp(arch: str): if len(dest_dirs) == 0: print("Nothing to sync", flush=True) - failed.append(('', arch)) + failed.append(("", arch)) continue cmd_args = [ - "dnf", "reposync", - "-c", conf.name, - "--delete", "-p", str(args.working_dir.absolute())] + "dnf", + "reposync", + "-c", + conf.name, + "--delete", + "-p", + str(args.working_dir.absolute()), + ] if args.pass_arch_to_reposync: cmd_args += ["--arch", arch] print(f"Launching dnf reposync with command: {cmd_args}", flush=True) @@ -237,7 +270,16 @@ def combination_os_comp(arch: str): if args.download_repodata: download_repodata(url, path) else: - cmd_args = ["createrepo_c", "--update", "-v", "-c", cache_dir, "-o", str(path), str(path)] + cmd_args = [ + "createrepo_c", + "--update", + "-v", + "-c", + cache_dir, + "-o", + str(path), + str(path), + ] print(f"Launching createrepo with command: {cmd_args}", flush=True) ret = sp.run(cmd_args) calc_repo_size(path) @@ -250,5 +292,6 @@ def combination_os_comp(arch: str): total_size = sum([r[0] for r in REPO_STAT.values()]) fd.write(f"+{total_size}") + if __name__ == "__main__": main()