From 32df24011566eeb723db700a67655aab20cb405a Mon Sep 17 00:00:00 2001 From: benoit74 Date: Tue, 24 Sep 2024 09:58:07 +0000 Subject: [PATCH] Add Devdocs offliner, category and warehouse path --- dev/receiver/create-warehouse-paths.sh | 2 + dispatcher/backend/docs/openapi_v1.yaml | 1 + dispatcher/backend/src/common/enum.py | 17 +- .../backend/src/common/schemas/models.py | 2 + .../src/common/schemas/offliners/__init__.py | 2 + .../src/common/schemas/offliners/devdocs.py | 162 ++++++++++++++++++ dispatcher/backend/src/utils/offliners.py | 1 + dispatcher/frontend-ui/src/constants.js | 6 +- workers/app/common/constants.py | 4 +- workers/contrib/zimfarm.config.example | 3 +- workers/contrib/zimfarm.sh | 1 + 11 files changed, 195 insertions(+), 6 deletions(-) create mode 100644 dispatcher/backend/src/common/schemas/offliners/devdocs.py diff --git a/dev/receiver/create-warehouse-paths.sh b/dev/receiver/create-warehouse-paths.sh index df44a47f1..c2c915599 100755 --- a/dev/receiver/create-warehouse-paths.sh +++ b/dev/receiver/create-warehouse-paths.sh @@ -6,6 +6,7 @@ mkdir -p \ /jail/zim/freecodecamp \ /jail/zim/gutenberg \ /jail/zim/ifixit \ + /jail/zim/devdocs \ /jail/zim/mooc \ /jail/zim/other \ /jail/zim/phet \ @@ -28,6 +29,7 @@ chmod 777 \ /jail/zim/freecodecamp \ /jail/zim/gutenberg \ /jail/zim/ifixit \ + /jail/zim/devdocs \ /jail/zim/mooc \ /jail/zim/other \ /jail/zim/phet \ diff --git a/dispatcher/backend/docs/openapi_v1.yaml b/dispatcher/backend/docs/openapi_v1.yaml index 335316ec7..e3192dd47 100644 --- a/dispatcher/backend/docs/openapi_v1.yaml +++ b/dispatcher/backend/docs/openapi_v1.yaml @@ -1915,6 +1915,7 @@ components: - wikihow - zimit - ifixit + - devdocs example: - mwoffliner - sotoki diff --git a/dispatcher/backend/src/common/enum.py b/dispatcher/backend/src/common/enum.py index a2ece3acf..f0a5aab34 100644 --- a/dispatcher/backend/src/common/enum.py +++ b/dispatcher/backend/src/common/enum.py @@ -113,6 +113,7 @@ class ScheduleCategory: wiktionary = "wiktionary" ifixit = "ifixit" freecodecamp = "freecodecamp" + devdocs = "devdocs" @classmethod def all(cls): @@ -137,6 +138,7 @@ def all(cls): cls.wiktionary, cls.ifixit, cls.freecodecamp, + cls.devdocs, ] @classmethod @@ -168,6 +170,7 @@ class DockerImageName: wikihow = "openzim/wikihow" ifixit = "openzim/ifixit" freecodecamp = "openzim/freecodecamp" + devdocs = "openzim/devdocs" @classmethod def all(cls) -> set: @@ -185,6 +188,7 @@ def all(cls) -> set: cls.wikihow, cls.ifixit, cls.freecodecamp, + cls.devdocs, } @@ -202,6 +206,7 @@ class Offliner: wikihow = "wikihow" ifixit = "ifixit" freecodecamp = "freecodecamp" + devdocs = "devdocs" @classmethod def all(cls): @@ -219,6 +224,7 @@ def all(cls): cls.wikihow, cls.ifixit, cls.freecodecamp, + cls.devdocs, ] @classmethod @@ -243,6 +249,7 @@ def get_image_name(cls, offliner): cls.wikihow: DockerImageName.wikihow, cls.ifixit: DockerImageName.ifixit, cls.freecodecamp: DockerImageName.freecodecamp, + cls.devdocs: DockerImageName.devdocs, }.get(offliner, "-") @@ -264,10 +271,18 @@ class Platform: wikihow = "wikihow" ifixit = "ifixit" ted = "ted" + devdocs = "devdocs" @classmethod def all(cls) -> str: - return [cls.wikimedia, cls.youtube, cls.wikihow, cls.ifixit, cls.ted] + return [ + cls.wikimedia, + cls.youtube, + cls.wikihow, + cls.ifixit, + cls.ted, + cls.devdocs, + ] @classmethod def get_max_per_worker_tasks_for(cls, platform) -> int: diff --git a/dispatcher/backend/src/common/schemas/models.py b/dispatcher/backend/src/common/schemas/models.py index e71c885f6..8ff3eab8e 100644 --- a/dispatcher/backend/src/common/schemas/models.py +++ b/dispatcher/backend/src/common/schemas/models.py @@ -21,6 +21,7 @@ validate_warehouse_path, ) from common.schemas.offliners import ( + DevDocsFlagsSchema, FreeCodeCampFlagsSchema, GutenbergFlagsSchema, IFixitFlagsSchema, @@ -101,6 +102,7 @@ def get_offliner_schema(offliner): Offliner.wikihow: WikihowFlagsSchema, Offliner.ifixit: IFixitFlagsSchema, Offliner.freecodecamp: FreeCodeCampFlagsSchema, + Offliner.devdocs: DevDocsFlagsSchema, }.get(offliner, Schema) @validates_schema diff --git a/dispatcher/backend/src/common/schemas/offliners/__init__.py b/dispatcher/backend/src/common/schemas/offliners/__init__.py index f7eebfbec..07dab227e 100644 --- a/dispatcher/backend/src/common/schemas/offliners/__init__.py +++ b/dispatcher/backend/src/common/schemas/offliners/__init__.py @@ -1,4 +1,5 @@ from common.schemas import SerializableSchema +from common.schemas.offliners.devdocs import DevDocsFlagsSchema from common.schemas.offliners.freecodecamp import FreeCodeCampFlagsSchema from common.schemas.offliners.gutenberg import GutenbergFlagsSchema from common.schemas.offliners.ifixit import IFixitFlagsSchema @@ -16,6 +17,7 @@ from common.schemas.offliners.zimit import ZimitFlagsSchema, ZimitFlagsSchemaRelaxed __all__ = ( + "DevDocsFlagsSchema", "FreeCodeCampFlagsSchema", "GutenbergFlagsSchema", "IFixitFlagsSchema", diff --git a/dispatcher/backend/src/common/schemas/offliners/devdocs.py b/dispatcher/backend/src/common/schemas/offliners/devdocs.py new file mode 100644 index 000000000..743dcf0a1 --- /dev/null +++ b/dispatcher/backend/src/common/schemas/offliners/devdocs.py @@ -0,0 +1,162 @@ +from marshmallow import fields + +from common.schemas import SerializableSchema, String +from common.schemas.fields import ( + validate_output, + validate_zim_description, + validate_zim_long_description, +) + + +class DevDocsFlagsSchema(SerializableSchema): + class Meta: + ordered = True + + all_flag = fields.Boolean( + truthy=[True], + falsy=[False], + metadata={ + "label": "All", + "description": "Fetch all Devdocs resources, and produce one ZIM " + "per resource.", + }, + data_key="all", + ) + + slug = String( # should be ListOfString but not yet supported by Zimfarm + metadata={ + "label": "Slug", + "description": "Fetch the provided Devdocs resource. " + "Slugs are the first path entry in the Devdocs URL. " + "For example, the slug for: `https://devdocs.io/gcc~12/` is `gcc~12`.", + }, + ) + + first = fields.Integer( + metadata={ + "label": "Number of first items", + "description": "Fetch only the first N items per slug as shown " + "in the DevDocs UI. Do not set to fetch all items.", + }, + ) + + skip_slug_regex = String( + metadata={ + "label": "Skip slugs regex", + "description": "Skips slugs matching the given regular expression." + "Do not set to fetch all slugs", + }, + data_key="skip-slug-regex", + ) + + file_name_format = String( + metadata={ + "label": "ZIM filename", + "description": "ZIM filename. Do not input trailing `.zim`, it " + "will be automatically added. You can use placeholders, see " + "https://github.com/openzim/devdocs/blob/main/README.md. Defaults " + "to devdocs.io_en_{clean_slug}_{period}", + }, + data_key="file-name-format", + ) + + name_format = String( + metadata={ + "label": "ZIM name", + "description": "ZIM name. You can use placeholders, see " + "https://github.com/openzim/devdocs/blob/main/README.md. Defaults " + "to devdocs.io_en_{clean_slug}", + }, + data_key="name-format", + ) + + title_format = String( + metadata={ + "label": "ZIM title", + "description": "ZIM title. You can use placeholders, see " + "https://github.com/openzim/devdocs/blob/main/README.md. Defaults " + "to `{full_name} Docs`", + }, + data_key="title-format", + ) + + description_format = String( + metadata={ + "label": "ZIM description", + "description": "ZIM description. You can use placeholders, see " + "https://github.com/openzim/devdocs/blob/main/README.md. Defaults " + "to `{full_name} docs by DevDocs`", + }, + data_key="description-format", + validate=validate_zim_description, + ) + + long_description_format = String( + metadata={ + "label": "ZIM long description", + "description": "ZIM long description. You can use placeholders, see " + "https://github.com/openzim/devdocs/blob/main/README.md. Defaults " + "to `{full_name} docs by DevDocs`", + }, + data_key="long-description-format", + validate=validate_zim_long_description, + ) + + tags = String( + metadata={ + "label": "ZIM Tags", + "description": "List of semi-colon-separated Tags for the ZIM file. " + " You can use placeholders, see " + "https://github.com/openzim/devdocs/blob/main/README.md. Defaults to" + "`devdocs;{slug_without_version}`", + } + ) + + creator = String( + metadata={ + "label": "Creator", + "description": "Name of content creator. “DevDocs” otherwise", + }, + ) + + publisher = String( + metadata={ + "label": "Publisher", + "description": "Custom publisher name (ZIM metadata). “openZIM” otherwise", + }, + ) + + output = String( + metadata={ + "label": "Output folder", + "placeholder": "/output", + "description": "Output folder for ZIM file(s). Leave it as `/output`", + }, + load_default="/output", + dump_default="/output", + validate=validate_output, + ) + + debug = fields.Boolean( + truthy=[True], + falsy=[False], + metadata={"label": "Debug", "description": "Enable verbose output"}, + ) + + devdocs_frontend_url = String( + metadata={ + "label": "DevDocs frontend URL", + "description": "Scheme and hostname for the devdocs frontend." + "Defaults to https://devdocs.io", + }, + data_key="devdocs-frontend-url", + ) + + devdocs_documents_url = String( + metadata={ + "label": "DevDocs documents URL", + "description": "Scheme and hostname for the devdocs documents server." + "Defaults to https://documents.devdocs.io", + }, + data_key="devdocs-documents-url", + ) diff --git a/dispatcher/backend/src/utils/offliners.py b/dispatcher/backend/src/utils/offliners.py index dbea6a95a..524709750 100644 --- a/dispatcher/backend/src/utils/offliners.py +++ b/dispatcher/backend/src/utils/offliners.py @@ -25,6 +25,7 @@ Offliner.nautilus: od("nautiluszim", True, False), Offliner.zimit: od("zimit", True, "statsFilename"), Offliner.kolibri: od("kolibri2zim", True, False), + Offliner.devdocs: od("devdocs2zim", True, False), } diff --git a/dispatcher/frontend-ui/src/constants.js b/dispatcher/frontend-ui/src/constants.js index ab21f7a58..f79b94e13 100644 --- a/dispatcher/frontend-ui/src/constants.js +++ b/dispatcher/frontend-ui/src/constants.js @@ -336,17 +336,17 @@ export default { cancelable_statuses: cancelable_statuses, running_statuses: running_statuses, contact_email: "contact@kiwix.org", - categories: ["freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange", + categories: ["devdocs", "freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange", "ted", "openedx", "vikidia", "wikibooks", "wikihow", "wikinews", "wikipedia", "wikiquote", "wikisource", "wikispecies", "wikiversity", "wikivoyage", "wiktionary"], // list of categories for fileering - warehouse_paths: ["/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange", + warehouse_paths: ["/devdocs", "/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange", "/ted", "/mooc", "/videos", "/vikidia", "/wikibooks", "/wikihow", "/wikinews", "/wikipedia", "/wikiquote", "/wikisource", "/wikiversity", "/wikivoyage", "/wiktionary", "/zimit", "/.hidden/dev", "/.hidden/private", "/.hidden/endless", "/.hidden/bard", "/.hidden/bsf", "/.hidden/custom_apps"], - offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp"], + offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp", "devdocs"], periodicities: ["manually", "monthly", "quarterly", "biannualy", "annually"], memory_values: [536870912, // 512MiB 1073741824, // 1GiB diff --git a/workers/app/common/constants.py b/workers/app/common/constants.py index 28805bfc6..0f43fe3e0 100644 --- a/workers/app/common/constants.py +++ b/workers/app/common/constants.py @@ -122,6 +122,7 @@ OFFLINER_WIKIHOW = "wikihow" OFFLINER_IFIXIT = "ifixit" OFFLINER_FREECODECAMP = "freecodecamp" +OFFLINER_DEVDOCS = "devdocs" ALL_OFFLINERS = [ OFFLINER_MWOFFLINER, @@ -137,6 +138,7 @@ OFFLINER_WIKIHOW, OFFLINER_IFIXIT, OFFLINER_FREECODECAMP, + OFFLINER_DEVDOCS, ] SUPPORTED_OFFLINERS = [ offliner @@ -152,7 +154,7 @@ OFFLINER_YOUTUBE, ] -ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted"] +ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted", "devdocs"] PLATFORMS_TASKS = {} for platform in ALL_PLATFORMS: name = f"PLATFORM_{platform}_MAX_TASKS" diff --git a/workers/contrib/zimfarm.config.example b/workers/contrib/zimfarm.config.example index dc507ff39..a7628f809 100644 --- a/workers/contrib/zimfarm.config.example +++ b/workers/contrib/zimfarm.config.example @@ -47,7 +47,7 @@ ZIMFARM_CPU="3" # Comma-separated list of offliners to run or `""` for all of them. If # you want to run `youtube` tasks, you need to be whitelisted, contact # us. -ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp" +ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp,devdocs" # Set to `"y"` to only run task specifically assigned to this worker # (`""` otherwise) @@ -66,4 +66,5 @@ DISABLE_IPV6="" # PLATFORM_youtube_MAX_TASKS=2 # PLATFORM_wikihow_MAX_TASKS=2 # PLATFORM_ifixit_MAX_TASKS=2 +# PLATFORM_devdocs_MAX_TASKS=2 # PLATFORM_ted_MAX_TASKS=2 diff --git a/workers/contrib/zimfarm.sh b/workers/contrib/zimfarm.sh index 1f9b1b243..8b64870e3 100755 --- a/workers/contrib/zimfarm.sh +++ b/workers/contrib/zimfarm.sh @@ -197,6 +197,7 @@ function restart() { --env PLATFORM_youtube_MAX_TASKS=$PLATFORM_youtube_MAX_TASKS \ --env PLATFORM_wikihow_MAX_TASKS=$PLATFORM_wikihow_MAX_TASKS \ --env PLATFORM_ifixit_MAX_TASKS=$PLATFORM_ifixit_MAX_TASKS \ + --env PLATFORM_devdocs_MAX_TASKS=$PLATFORM_devdocs_MAX_TASKS \ --env PLATFORM_ted_MAX_TASKS=$PLATFORM_ted_MAX_TASKS \ --env POLL_INTERVAL=$POLL_INTERVAL \ --env DNSCACHE_IMAGE=$DNSCACHE_IMAGE \