-
-
Notifications
You must be signed in to change notification settings - Fork 26
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Create a tool to automatically create and maintain Zimfarm recipes
- Loading branch information
Showing
13 changed files
with
1,146 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
## Recipes Auto | ||
|
||
This project is aimed at automatically creating and maintaining a group of Zimfarm recipes. | ||
|
||
First uses cases are for StackExchange and TED recipes, for which we want one recipe per StackExchange domain / TED topic. We know other will come (we already have shamela.ws recipes waiting in the pipe, libretexts.org would benefit from this as well, devdocs, ...). | ||
|
||
The goals of this project are: | ||
|
||
- easily create many recipes following the same "model", but with specificities for all of them | ||
- easily maintain these recipes over time | ||
- detect missing recipes and create them as needed (typically a new StackExchange website or a new TED topic appeared) | ||
- delete obsolete ones (typically a StackExchange website or TED topic is abandonned) | ||
- update parameters as needed (e.g. change all recipes periodicity, ...) | ||
- source data for these recipes from "source of truth", typically local / remote "data" files or APIs (potentially mixing data from multiple sources) | ||
- easy to configure when new project arise, reusing building blocks | ||
- two modes: "dry-run" (to check what is going to happen) and "apply" (to really apply it) | ||
- easily handle recipe configuration exceptions (because there is always exceptions) | ||
- write everything in Python for easy maintainability | ||
|
||
Out of scope for now: | ||
|
||
- GUI | ||
- permission management | ||
|
||
The dry-run mode will allow to: | ||
|
||
- easily observe what is going to be applied without risking breaking the configuration | ||
- modify the configuration / code until the plan is accurate | ||
|
||
Sample usage: | ||
|
||
``` | ||
recipesauto --zimfarm-username benoit --zimfarm-password $ZIMFARM_PASS --values ted_optim_url=$TED_OPTIM_URL ted | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,213 @@ | ||
[build-system] | ||
requires = ["hatchling", "hatch-openzim==0.2.1"] | ||
build-backend = "hatchling.build" | ||
|
||
[project] | ||
name = "recipesauto" | ||
requires-python = ">=3.12,<3.13" | ||
description = "Automatically create and maintain a group of Zimfarm recipes" | ||
readme = "../README.md" | ||
dependencies = [ | ||
"requests==2.32.3", | ||
"types-requests==2.32.0.20240914", | ||
"PyYAML==6.0.2", | ||
] | ||
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] | ||
|
||
[tool.hatch.metadata.hooks.openzim-metadata] | ||
additional-keywords = ["zimfarm"] | ||
|
||
[tool.hatch.build.hooks.openzim-build] | ||
|
||
[project.optional-dependencies] | ||
scripts = ["invoke==2.2.0"] | ||
lint = ["black==24.8.0", "ruff==0.6.4"] | ||
check = ["pyright==1.1.380"] | ||
test = ["pytest==8.3.3", "coverage==7.6.1"] | ||
dev = [ | ||
"pre-commit==3.8.0", | ||
"debugpy==1.8.5", | ||
"recipesauto[scripts]", | ||
"recipesauto[lint]", | ||
"recipesauto[test]", | ||
"recipesauto[check]", | ||
"humanfriendly==10.0" | ||
] | ||
|
||
[project.scripts] | ||
recipesauto = "recipesauto.__main__:main" | ||
|
||
[tool.hatch.version] | ||
path = "src/recipesauto/__about__.py" | ||
|
||
[tool.hatch.build] | ||
exclude = ["/.github"] | ||
|
||
[tool.hatch.build.targets.wheel] | ||
packages = ["src/recipesauto"] | ||
|
||
[tool.hatch.envs.default] | ||
features = ["dev"] | ||
|
||
[tool.hatch.envs.test] | ||
features = ["scripts", "test"] | ||
|
||
[tool.hatch.envs.test.scripts] | ||
run = "inv test --args '{args}'" | ||
run-cov = "inv test-cov --args '{args}'" | ||
report-cov = "inv report-cov" | ||
coverage = "inv coverage --args '{args}'" | ||
html = "inv coverage --html --args '{args}'" | ||
|
||
[tool.hatch.envs.lint] | ||
template = "lint" | ||
python = "py312" | ||
skip-install = false | ||
features = ["scripts", "lint"] | ||
|
||
[tool.hatch.envs.lint.scripts] | ||
black = "inv lint-black --args '{args}'" | ||
ruff = "inv lint-ruff --args '{args}'" | ||
all = "inv lintall --args '{args}'" | ||
fix-black = "inv fix-black --args '{args}'" | ||
fix-ruff = "inv fix-ruff --args '{args}'" | ||
fixall = "inv fixall --args '{args}'" | ||
|
||
[tool.hatch.envs.check] | ||
features = ["scripts", "check"] | ||
|
||
[tool.hatch.envs.check.scripts] | ||
pyright = "inv check-pyright --args '{args}'" | ||
all = "inv checkall --args '{args}'" | ||
|
||
[tool.black] | ||
line-length = 88 | ||
target-version = ['py312'] | ||
exclude = "(src/recipesauto/templates/.*|.hatch/.*)" | ||
|
||
[tool.ruff] | ||
target-version = "py312" | ||
line-length = 88 | ||
src = ["src"] | ||
|
||
[tool.ruff.lint] | ||
select = [ | ||
"A", # flake8-builtins | ||
# "ANN", # flake8-annotations | ||
"ARG", # flake8-unused-arguments | ||
# "ASYNC", # flake8-async | ||
"B", # flake8-bugbear | ||
# "BLE", # flake8-blind-except | ||
"C4", # flake8-comprehensions | ||
"C90", # mccabe | ||
# "COM", # flake8-commas | ||
# "D", # pydocstyle | ||
# "DJ", # flake8-django | ||
"DTZ", # flake8-datetimez | ||
"E", # pycodestyle (default) | ||
"EM", # flake8-errmsg | ||
# "ERA", # eradicate | ||
# "EXE", # flake8-executable | ||
"F", # Pyflakes (default) | ||
# "FA", # flake8-future-annotations | ||
"FBT", # flake8-boolean-trap | ||
# "FLY", # flynt | ||
# "G", # flake8-logging-format | ||
"I", # isort | ||
"ICN", # flake8-import-conventions | ||
# "INP", # flake8-no-pep420 | ||
# "INT", # flake8-gettext | ||
"ISC", # flake8-implicit-str-concat | ||
"N", # pep8-naming | ||
# "NPY", # NumPy-specific rules | ||
# "PD", # pandas-vet | ||
# "PGH", # pygrep-hooks | ||
# "PIE", # flake8-pie | ||
# "PL", # Pylint | ||
"PLC", # Pylint: Convention | ||
"PLE", # Pylint: Error | ||
"PLR", # Pylint: Refactor | ||
"PLW", # Pylint: Warning | ||
# "PT", # flake8-pytest-style | ||
# "PTH", # flake8-use-pathlib | ||
# "PYI", # flake8-pyi | ||
"Q", # flake8-quotes | ||
# "RET", # flake8-return | ||
# "RSE", # flake8-raise | ||
"RUF", # Ruff-specific rules | ||
"S", # flake8-bandit | ||
# "SIM", # flake8-simplify | ||
# "SLF", # flake8-self | ||
"T10", # flake8-debugger | ||
"T20", # flake8-print | ||
# "TCH", # flake8-type-checking | ||
# "TD", # flake8-todos | ||
"TID", # flake8-tidy-imports | ||
# "TRY", # tryceratops | ||
"UP", # pyupgrade | ||
"W", # pycodestyle | ||
"YTT", # flake8-2020 | ||
] | ||
ignore = [ | ||
# Allow non-abstract empty methods in abstract base classes | ||
"B027", | ||
# Allow use of date.today | ||
"DTZ011", | ||
# Remove flake8-errmsg since we consider they bloat the code and provide limited value | ||
"EM", | ||
# Allow boolean positional values in function calls, like `dict.get(... True)` | ||
"FBT003", | ||
# Ignore checks for possible passwords | ||
"S105", | ||
"S106", | ||
"S107", | ||
# Ignore warnings on subprocess.run / popen | ||
"S603", | ||
# Ignore complexity | ||
"C901", | ||
"PLR0911", | ||
"PLR0912", | ||
"PLR0913", | ||
"PLR0915", | ||
] | ||
unfixable = [ | ||
# Don't touch unused imports | ||
"F401", | ||
] | ||
|
||
[tool.ruff.lint.isort] | ||
known-first-party = ["recipesauto"] | ||
|
||
[tool.ruff.lint.flake8-tidy-imports] | ||
ban-relative-imports = "all" | ||
|
||
[tool.ruff.lint.per-file-ignores] | ||
# Tests can use magic values, assertions, and relative imports | ||
"tests/**/*" = ["PLR2004", "S101", "TID252"] | ||
"tests-integration/**/*" = ["PLR2004", "S101", "TID252"] | ||
|
||
[tool.pytest.ini_options] | ||
minversion = "7.3" | ||
testpaths = ["tests"] | ||
pythonpath = [".", "src"] | ||
|
||
[tool.coverage.paths] | ||
great_project = ["src/recipesauto"] | ||
tests = ["tests"] | ||
|
||
[tool.coverage.run] | ||
source_pkgs = ["recipesauto"] | ||
branch = true | ||
parallel = true | ||
omit = ["src/recipesauto/__about__.py"] | ||
|
||
[tool.coverage.report] | ||
exclude_lines = ["no cov", "if __name__ == .__main__.:", "if TYPE_CHECKING:"] | ||
|
||
[tool.pyright] | ||
include = ["src", "tests", "tasks.py"] | ||
exclude = [".env/**", ".venv/**", "src/recipesauto/templates", ".hatch"] | ||
extraPaths = ["src"] | ||
pythonVersion = "3.12" | ||
typeCheckingMode = "basic" | ||
disableBytesTypePromotions = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
__version__ = "0.1.0-dev0" |
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import sys | ||
import tempfile | ||
|
||
from recipesauto.constants import logger | ||
from recipesauto.entrypoint import prepare_context | ||
|
||
|
||
def main(): | ||
try: | ||
|
||
prepare_context(sys.argv[1:]) | ||
|
||
# import this only once the Context has been initialized, so that it gets an | ||
# initialized context | ||
from recipesauto.processor import Processor | ||
|
||
Processor().run() | ||
|
||
except SystemExit: | ||
logger.error("Execution failed, exiting") | ||
raise | ||
except Exception as exc: | ||
logger.exception(exc) | ||
logger.error(f"Execution failed with the following error: {exc}") | ||
raise SystemExit(1) from exc | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
import logging | ||
import pathlib | ||
|
||
from recipesauto.__about__ import __version__ | ||
|
||
NAME = "recipesauto" | ||
VERSION = __version__ | ||
ROOT_DIR = pathlib.Path(__file__).parent | ||
|
||
# logger to use everywhere (not part of Context class because we need it early, before | ||
# Context has been initialized) | ||
logging.basicConfig( | ||
level=logging.INFO, format="[%(asctime)s: %(levelname)s] %(message)s" | ||
) | ||
logger: logging.Logger = logging.getLogger(NAME) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import dataclasses | ||
from pathlib import Path | ||
|
||
from recipesauto.constants import ROOT_DIR | ||
|
||
|
||
@dataclasses.dataclass(kw_only=True) | ||
class Context: | ||
"""Class holding every contextual / configuration bits which can be moved | ||
Used to easily pass information around in the scraper. One singleton instance is | ||
always available. | ||
""" | ||
|
||
# singleton instance | ||
_instance: "Context | None" = None | ||
|
||
# push changes to Zimfarm | ||
push: bool = False | ||
|
||
# set of recipes to maintain | ||
set: str | ||
|
||
# URL to Zimfarm API | ||
zimfarm_api_url: str = "https://api.farm.openzim.org/v1" | ||
|
||
# Credentials to Zimfarm | ||
zimfarm_username: str | ||
zimfarm_password: str | ||
|
||
# timeout of HTTP calls | ||
http_timeout: int = 10 | ||
|
||
# dict of values to use in configuration (typically to pass secrets) | ||
values: dict[str, str] | ||
|
||
# path to files with configuration override | ||
overrides: Path = Path(ROOT_DIR / "overrides.yaml") | ||
|
||
@classmethod | ||
def setup(cls, **kwargs): | ||
new_instance = cls(**kwargs) | ||
if cls._instance: | ||
# replace values 'in-place' so that we do not change the Context object | ||
# which might be already imported in some modules | ||
for field in dataclasses.fields(new_instance): | ||
cls._instance.__setattr__( | ||
field.name, new_instance.__getattribute__(field.name) | ||
) | ||
else: | ||
cls._instance = new_instance | ||
|
||
@classmethod | ||
def get(cls) -> "Context": | ||
if not cls._instance: | ||
raise OSError("Uninitialized context") # pragma: no cover | ||
return cls._instance |
Oops, something went wrong.