From bf49ab0c868582c1d47fd72f250d136184682e08 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Mon, 26 Feb 2024 10:24:07 +0100 Subject: [PATCH 1/4] Add a runonce argument in watcher to run only once - typically for testing --- watcher/watcher.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/watcher/watcher.py b/watcher/watcher.py index 57eb59c4..87f0d467 100644 --- a/watcher/watcher.py +++ b/watcher/watcher.py @@ -506,7 +506,7 @@ def run(self): checked_on = datetime.datetime.now() self.check_and_go() - while self.running: + while self.running and not self.runonce: if datetime.datetime.now() > checked_on + self.duration: checked_on = datetime.datetime.now() self.check_and_go() @@ -584,6 +584,13 @@ def entrypoint(): "--debug", help="Enable verbose output", action="store_true", default=False ) + parser.add_argument( + "--runonce", + help="Run only one check and stops", + action="store_true", + default=False, + ) + parser.add_argument( "--version", help="Display version and exit", From 573c5a5e14803c67470a300bff0e957ccc9b31fe Mon Sep 17 00:00:00 2001 From: benoit74 Date: Mon, 26 Feb 2024 10:25:39 +0100 Subject: [PATCH 2/4] Add CONTRIBUTING.md for watcher component --- watcher/CONTRIBUTING.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 watcher/CONTRIBUTING.md diff --git a/watcher/CONTRIBUTING.md b/watcher/CONTRIBUTING.md new file mode 100644 index 00000000..a197bf92 --- /dev/null +++ b/watcher/CONTRIBUTING.md @@ -0,0 +1,27 @@ +# Contributing + +In order to test this component, you need: +- a test Zimfarm instance with a username + password +- a test S3 bucket (compliance must NOT be activated on this bucket) +- credentials to access this bucket (keyId and secretAccessKey suggested below) +- Docker + +Rebuild the Docker image: + +``` +docker build -t local-zf-watcher . +``` + +Export the secret `S3_URL` as environment variable. Note that the S3 URL starts with `https`. + + +On Bash/Zsh shells (replace ``, ``, `` and `` with proper values): + +``` + export S3_URL="https:///?keyId=&secretAccessKey=&bucketName=" +``` + +Run a test (here my zimfarm is running in docker on a container `backend` in network `zimfarm_default`, adapt command to your local setup): +``` +docker run -it --rm -e ZIMFARM_API_URL=http://backend:8000/v1 -e S3_URL=$S3_URL --network zimfarm_default local-zf-watcher watcher --zimfarm-username admin --zimfarm-password admin --only tezos.stackexchange.com --runonce +``` \ No newline at end of file From e7bd0bdc2fa0e9db3184e23b4faab2764d738855 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Thu, 22 Feb 2024 17:41:27 +0100 Subject: [PATCH 3/4] Upgrade watcher dependencies and pin Python version --- watcher/Dockerfile | 2 +- watcher/requirements.txt | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/watcher/Dockerfile b/watcher/Dockerfile index 51c0d6cf..c6843044 100644 --- a/watcher/Dockerfile +++ b/watcher/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-alpine +FROM python:3.12-alpine LABEL zimfarm=true LABEL org.opencontainers.image.source https://github.com/openzim/zimfarm diff --git a/watcher/requirements.txt b/watcher/requirements.txt index d82f4955..b5a7deb6 100644 --- a/watcher/requirements.txt +++ b/watcher/requirements.txt @@ -1,6 +1,6 @@ pif==0.8.2 -requests>=2.26,<3.0 -humanfriendly>=9.2,<10.0 -PyJWT>=2.4.0,<3.0 -kiwixstorage>=0.8.2,<0.9 -xml-to-dict>=0.1.6,<0.2 +requests==2.31.0 +humanfriendly==10.0 +PyJWT==2.8.0 +kiwixstorage==0.8.3 +xml-to-dict==0.1.6 From 2864f402102dd02ad419e42d04f8537c5cbd2b83 Mon Sep 17 00:00:00 2001 From: benoit74 Date: Thu, 22 Feb 2024 17:42:37 +0100 Subject: [PATCH 4/4] Simplify watcher logic around number of threads - Docker detection not working anymore --- watcher/watcher.py | 23 ++--------------------- 1 file changed, 2 insertions(+), 21 deletions(-) diff --git a/watcher/watcher.py b/watcher/watcher.py index 87f0d467..a0085539 100644 --- a/watcher/watcher.py +++ b/watcher/watcher.py @@ -26,7 +26,6 @@ import datetime import json import logging -import multiprocessing import os import pathlib import re @@ -64,21 +63,6 @@ logging.getLogger(logger_name).setLevel(logging.WARNING) -def is_running_inside_container(): - """whether running inside a Docker container""" - fpath = pathlib.Path("/proc/self/cgroup") - if not fpath.exists(): - return False - try: - with open(fpath, "r") as fh: - for line in fh.readlines(): - if line.strip().rsplit(":", 1)[-1] != "/": - return True - finally: - pass - return False - - def get_version_for(url): """casted datetime of the Last-Modified header for an URL""" with requests.head(url, allow_redirects=True) as resp: @@ -559,13 +543,10 @@ def entrypoint(): ) parser.add_argument( "--threads", - help="How many threads to run to parallelize download/upload? " - "Defaults to 1 inside Docker as we can't guess available CPUs", + help="How many threads to run to parallelize download/upload? Defaults to 1", dest="nb_threads", type=int, - default=( - 1 if is_running_inside_container() else multiprocessing.cpu_count() - 1 or 1 - ), + default=1, ) parser.add_argument(