Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tools: add get_apps_repo.py, a lib that allows to pass --apps-repo or --apps-dir to tools #2510

Merged
merged 16 commits into from
Sep 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ jobs:
python3 -c "import toml; toml.load(open('apps.toml'))"
- name: Check all working apps have consistent app id / app url and categories
run: |
./tools/catalog_linter.py
./tools/catalog_linter.py --apps-dir .
- name: Check the generation of the app catalog
run: |
./tools/list_builder.py
8 changes: 8 additions & 0 deletions maintenance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,17 @@ function git_pull_and_update_cron_and_restart_services_if_needed()
systemctl --quiet is-active webhooks || sendxmpppy "[autoreadme] Uhoh, failed to (re)start the autoreadme service?"
}

function update_app_cache()
{
./tools/app_caches.py -d -l . -c .apps_caches -j20
}

function rebuild_catalog()
{
log=$workdir/app_list_auto_update.log
date >> $log
git_pull_and_update_cron_and_restart_services_if_needed
update_app_cache
./tools/list_builder.py &>> $log || sendxmpppy "[listbuilder] Rebuilding the application list failed miserably"
}

Expand All @@ -97,13 +103,15 @@ function autoupdate_app_sources()
log=$workdir/app_sources_auto_update.log
date >> $log
git_pull_and_update_cron_and_restart_services_if_needed
update_app_cache
tools/autoupdate_app_sources/venv/bin/python3 tools/autoupdate_app_sources/autoupdate_app_sources.py \
--latest-commit-weekly --edit --commit --pr --paste -j1 \
&> $log || sendxmpppy "[appsourcesautoupdate] App sources auto-update failed miserably"
}

function update_app_levels()
{
update_app_cache
pushd tools/update_app_levels >/dev/null
python3 update_app_levels.py
popd >/dev/null
Expand Down
178 changes: 87 additions & 91 deletions tools/app_caches.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,120 +10,112 @@

import tqdm

from appslib.utils import (
REPO_APPS_ROOT, # pylint: disable=import-error
get_catalog,
git_repo_age,
)
from git import Repo
from git.repo.fun import is_git_dir

from appslib.utils import get_catalog
import appslib.get_apps_repo as get_apps_repo

APPS_CACHE_DIR = REPO_APPS_ROOT / ".apps_cache"

class AppDir:
def __init__(self, name: str, path: Path) -> None:
self.name = name
self.path = path

def app_cache_folder(app: str) -> Path:
return APPS_CACHE_DIR / app
def ensure(
self, remote: str, branch: str, url_ssh: bool, all_branches: bool
) -> None:
# Patch url for ssh clone
if url_ssh:
remote = remote.replace("https://github.com/", "git@github.com:")

op = self._update if is_git_dir(self.path / ".git") else self._clone
op(remote, all_branches, branch)

def app_cache_clone(
app: str, infos: dict[str, str], all_branches: bool = False
) -> None:
logging.info("Cloning %s...", app)
git_depths = {
"notworking": 5,
"inprogress": 20,
"default": 40,
}
if app_cache_folder(app).exists():
shutil.rmtree(app_cache_folder(app))
Repo.clone_from(
infos["url"],
to_path=app_cache_folder(app),
depth=git_depths.get(infos["state"], git_depths["default"]),
single_branch=not all_branches,
branch=infos.get("branch", "master"),
)


def app_cache_clone_or_update(
app: str,
infos: dict[str, str],
ssh_clone: bool = False,
fetch_all_branches: bool = False,
) -> None:
app_path = app_cache_folder(app)

# Patch url for ssh clone
if ssh_clone:
infos["url"] = infos["url"].replace("https://github.com/", "git@github.com:")

# Don't refresh if already refreshed during last hour
age = git_repo_age(app_path)
if age is False:
app_cache_clone(app, infos, fetch_all_branches)
return

# if age < 3600:
# logging.info(f"Skipping {app}, it's been updated recently.")
# return

logging.info("Updating %s...", app)
repo = Repo(app_path)
repo.remote("origin").set_url(infos["url"])

branch = infos.get("branch", "master")
if fetch_all_branches:
repo.git.remote("set-branches", "origin", "*")
repo.remote("origin").fetch()
repo.remote("origin").pull()
else:
if repo.active_branch != branch:
all_branches = [str(b) for b in repo.branches]
if branch in all_branches:
repo.git.checkout(branch, "--force")
def cleanup(self) -> None:
logging.warning(f"Cleaning up {self.path}...")
if self.path.exists():
if self.path.is_dir():
shutil.rmtree(self.path)
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")

repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")

self.path.unlink()

def _clone(self, remote: str, all_branches: bool, branch: str) -> None:
logging.info("Cloning %s...", self.name)

if self.path.exists():
self.cleanup()
Repo.clone_from(
remote,
to_path=self.path,
depth=40,
single_branch=not all_branches,
branch=branch,
)

def __app_cache_clone_or_update_mapped(data):
name, info, ssh_clone, all_branches = data
def _update(self, remote: str, all_branches: bool, branch: str) -> None:
logging.info("Updating %s...", self.name)
repo = Repo(self.path)
repo.remote("origin").set_url(remote)

if all_branches:
repo.git.remote("set-branches", "origin", "*")
repo.remote("origin").fetch()
repo.remote("origin").pull()
else:
if repo.active_branch != branch:
repo_branches = [str(b) for b in repo.heads]
if branch in repo_branches:
repo.git.checkout(branch, "--force")
else:
repo.git.remote("set-branches", "--add", "origin", branch)
repo.remote("origin").fetch(f"{branch}:{branch}")

repo.remote("origin").fetch(refspec=branch, force=True)
repo.git.reset("--hard", f"origin/{branch}")


def __appdir_ensure_mapped(data):
name, path, url, branch, url_ssh, all_branches = data
try:
app_cache_clone_or_update(name, info, ssh_clone, all_branches)
AppDir(name, path).ensure(url, branch, url_ssh, all_branches)
except Exception as err:
logging.error("[App caches] Error while updating %s: %s", name, err)


def apps_cache_update_all(
cache_path: Path,
apps: dict[str, dict[str, Any]],
parallel: int = 8,
ssh_clone: bool = False,
url_ssh: bool = False,
all_branches: bool = False,
) -> None:
with Pool(processes=parallel) as pool:
tasks = pool.imap_unordered(
__app_cache_clone_or_update_mapped,
zip(apps.keys(), apps.values(), repeat(ssh_clone), repeat(all_branches)),
args = (
(
app,
cache_path / app,
info["url"],
info.get("branch", "master"),
url_ssh,
all_branches,
)
for app, info in apps.items()
)
with Pool(processes=parallel) as pool:
tasks = pool.imap_unordered(__appdir_ensure_mapped, args)
for _ in tqdm.tqdm(tasks, total=len(apps.keys()), ascii=" ·#"):
pass


def apps_cache_cleanup(apps: dict[str, dict[str, Any]]) -> None:
for element in APPS_CACHE_DIR.iterdir():
def apps_cache_cleanup(cache_path: Path, apps: dict[str, dict[str, Any]]) -> None:
for element in cache_path.iterdir():
if element.name not in apps.keys():
logging.warning(f"Removing {element}...")
if element.is_dir():
shutil.rmtree(element)
else:
element.unlink()
AppDir("", element).cleanup()


def __run_for_catalog():
parser = argparse.ArgumentParser()
get_apps_repo.add_args(parser)
parser.add_argument("-v", "--verbose", action="store_true")
parser.add_argument("-j", "--processes", type=int, default=8)
parser.add_argument(
Expand All @@ -141,24 +133,28 @@ def __run_for_catalog():
help="Download all branches from repo",
)
parser.add_argument(
"-c",
"--cleanup",
"-d",
"--delete-missing",
action="store_true",
default=False,
help="Remove unknown directories from the app cache",
)
args = parser.parse_args()

if args.verbose:
logging.getLogger().setLevel(logging.INFO)

APPS_CACHE_DIR.mkdir(exist_ok=True, parents=True)
cache_path = get_apps_repo.cache_path(args)
cache_path.mkdir(exist_ok=True, parents=True)

if args.delete_missing:
apps_cache_cleanup(cache_path, get_catalog())

if args.cleanup:
apps_cache_cleanup(get_catalog())
apps_cache_update_all(
cache_path,
get_catalog(),
parallel=args.processes,
ssh_clone=args.ssh,
url_ssh=args.ssh,
all_branches=args.all_branches,
)

Expand Down
92 changes: 92 additions & 0 deletions tools/appslib/get_apps_repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
#!/usr/bin/env python3

import os
import argparse
import tempfile
import logging
from pathlib import Path
from typing import Optional
from git import Repo
from .utils import set_apps_path


DEFAULT_GIT_REPO = "https://github.com/YunoHost/apps"

# This provides a reference to the tempfile, thus keeping it alive until sys.exit
APPS_REPO_TMPDIR: Optional[tempfile.TemporaryDirectory] = None

# This is the actual value returned by from_args()
APPS_REPO_PATH: Optional[Path] = None

APPS_CACHE_PATH: Optional[Path] = None


def add_args(parser: argparse.ArgumentParser, allow_temp: bool = True) -> None:
env_apps_dir_str = os.environ.get("YNH_APPS_DIR")
env_apps_dir = Path(env_apps_dir_str) if env_apps_dir_str is not None else None

repo_group = parser.add_mutually_exclusive_group(required=False)
repo_group.add_argument(
"-l",
"--apps-dir",
type=Path,
default=env_apps_dir,
help="Path to a local 'apps' repository",
)
if allow_temp:
repo_group.add_argument(
"-r",
"--apps-repo",
type=str,
default=DEFAULT_GIT_REPO,
help="Git url to clone the remote 'apps' repository",
)
parser.add_argument(
"-c",
"--apps-cache",
type=Path,
help="Path to the apps cache directory (default=<apps repo>/.apps_cache)",
)


def from_args(args: Optional[argparse.Namespace]) -> Path:
global APPS_REPO_PATH
global APPS_REPO_TMPDIR

if APPS_REPO_PATH is not None:
return APPS_REPO_PATH

assert args is not None
if args.apps_dir is not None:
APPS_REPO_PATH = args.apps_dir
elif args.apps_repo is not None:
APPS_REPO_TMPDIR = tempfile.TemporaryDirectory(prefix="yunohost_apps_")
APPS_REPO_PATH = Path(APPS_REPO_TMPDIR.name)
logging.info("Cloning the 'apps' repository...")
repo = Repo.clone_from(args.apps_repo, to_path=APPS_REPO_PATH)
assert repo.working_tree_dir is not None
else:
raise RuntimeError("You need to pass either --apps-repo or --apps-dir!")

assert APPS_REPO_PATH is not None
set_apps_path(APPS_REPO_PATH)
return APPS_REPO_PATH


def cache_path(args: Optional[argparse.Namespace]) -> Path:
global APPS_CACHE_PATH

if APPS_CACHE_PATH is not None:
return APPS_CACHE_PATH

assert args is not None
if args.apps_cache is not None:
APPS_CACHE_PATH = args.apps_cache
else:
if APPS_REPO_PATH is None:
from_args(args)
assert APPS_REPO_PATH is not None
APPS_CACHE_PATH = APPS_REPO_PATH / ".apps_cache"

assert APPS_CACHE_PATH is not None
return APPS_CACHE_PATH
Loading