Skip to content

Commit

Permalink
Add Devdocs offliner, category and warehouse path
Browse files Browse the repository at this point in the history
  • Loading branch information
benoit74 committed Sep 24, 2024
1 parent 72481c2 commit 32df240
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 6 deletions.
2 changes: 2 additions & 0 deletions dev/receiver/create-warehouse-paths.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ mkdir -p \
/jail/zim/freecodecamp \
/jail/zim/gutenberg \
/jail/zim/ifixit \
/jail/zim/devdocs \
/jail/zim/mooc \
/jail/zim/other \
/jail/zim/phet \
Expand All @@ -28,6 +29,7 @@ chmod 777 \
/jail/zim/freecodecamp \
/jail/zim/gutenberg \
/jail/zim/ifixit \
/jail/zim/devdocs \
/jail/zim/mooc \
/jail/zim/other \
/jail/zim/phet \
Expand Down
1 change: 1 addition & 0 deletions dispatcher/backend/docs/openapi_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1915,6 +1915,7 @@ components:
- wikihow
- zimit
- ifixit
- devdocs
example:
- mwoffliner
- sotoki
Expand Down
17 changes: 16 additions & 1 deletion dispatcher/backend/src/common/enum.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@ class ScheduleCategory:
wiktionary = "wiktionary"
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"

@classmethod
def all(cls):
Expand All @@ -137,6 +138,7 @@ def all(cls):
cls.wiktionary,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
]

@classmethod
Expand Down Expand Up @@ -168,6 +170,7 @@ class DockerImageName:
wikihow = "openzim/wikihow"
ifixit = "openzim/ifixit"
freecodecamp = "openzim/freecodecamp"
devdocs = "openzim/devdocs"

@classmethod
def all(cls) -> set:
Expand All @@ -185,6 +188,7 @@ def all(cls) -> set:
cls.wikihow,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
}


Expand All @@ -202,6 +206,7 @@ class Offliner:
wikihow = "wikihow"
ifixit = "ifixit"
freecodecamp = "freecodecamp"
devdocs = "devdocs"

@classmethod
def all(cls):
Expand All @@ -219,6 +224,7 @@ def all(cls):
cls.wikihow,
cls.ifixit,
cls.freecodecamp,
cls.devdocs,
]

@classmethod
Expand All @@ -243,6 +249,7 @@ def get_image_name(cls, offliner):
cls.wikihow: DockerImageName.wikihow,
cls.ifixit: DockerImageName.ifixit,
cls.freecodecamp: DockerImageName.freecodecamp,
cls.devdocs: DockerImageName.devdocs,
}.get(offliner, "-")


Expand All @@ -264,10 +271,18 @@ class Platform:
wikihow = "wikihow"
ifixit = "ifixit"
ted = "ted"
devdocs = "devdocs"

@classmethod
def all(cls) -> str:
return [cls.wikimedia, cls.youtube, cls.wikihow, cls.ifixit, cls.ted]
return [
cls.wikimedia,
cls.youtube,
cls.wikihow,
cls.ifixit,
cls.ted,
cls.devdocs,
]

@classmethod
def get_max_per_worker_tasks_for(cls, platform) -> int:
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
validate_warehouse_path,
)
from common.schemas.offliners import (
DevDocsFlagsSchema,
FreeCodeCampFlagsSchema,
GutenbergFlagsSchema,
IFixitFlagsSchema,
Expand Down Expand Up @@ -101,6 +102,7 @@ def get_offliner_schema(offliner):
Offliner.wikihow: WikihowFlagsSchema,
Offliner.ifixit: IFixitFlagsSchema,
Offliner.freecodecamp: FreeCodeCampFlagsSchema,
Offliner.devdocs: DevDocsFlagsSchema,
}.get(offliner, Schema)

@validates_schema
Expand Down
2 changes: 2 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from common.schemas import SerializableSchema
from common.schemas.offliners.devdocs import DevDocsFlagsSchema
from common.schemas.offliners.freecodecamp import FreeCodeCampFlagsSchema
from common.schemas.offliners.gutenberg import GutenbergFlagsSchema
from common.schemas.offliners.ifixit import IFixitFlagsSchema
Expand All @@ -16,6 +17,7 @@
from common.schemas.offliners.zimit import ZimitFlagsSchema, ZimitFlagsSchemaRelaxed

__all__ = (
"DevDocsFlagsSchema",
"FreeCodeCampFlagsSchema",
"GutenbergFlagsSchema",
"IFixitFlagsSchema",
Expand Down
162 changes: 162 additions & 0 deletions dispatcher/backend/src/common/schemas/offliners/devdocs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
from marshmallow import fields

from common.schemas import SerializableSchema, String
from common.schemas.fields import (
validate_output,
validate_zim_description,
validate_zim_long_description,
)


class DevDocsFlagsSchema(SerializableSchema):
class Meta:
ordered = True

all_flag = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={
"label": "All",
"description": "Fetch all Devdocs resources, and produce one ZIM "
"per resource.",
},
data_key="all",
)

slug = String( # should be ListOfString but not yet supported by Zimfarm
metadata={
"label": "Slug",
"description": "Fetch the provided Devdocs resource. "
"Slugs are the first path entry in the Devdocs URL. "
"For example, the slug for: `https://devdocs.io/gcc~12/` is `gcc~12`.",
},
)

first = fields.Integer(
metadata={
"label": "Number of first items",
"description": "Fetch only the first N items per slug as shown "
"in the DevDocs UI. Do not set to fetch all items.",
},
)

skip_slug_regex = String(
metadata={
"label": "Skip slugs regex",
"description": "Skips slugs matching the given regular expression."
"Do not set to fetch all slugs",
},
data_key="skip-slug-regex",
)

file_name_format = String(
metadata={
"label": "ZIM filename",
"description": "ZIM filename. Do not input trailing `.zim`, it "
"will be automatically added. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to devdocs.io_en_{clean_slug}_{period}",
},
data_key="file-name-format",
)

name_format = String(
metadata={
"label": "ZIM name",
"description": "ZIM name. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to devdocs.io_en_{clean_slug}",
},
data_key="name-format",
)

title_format = String(
metadata={
"label": "ZIM title",
"description": "ZIM title. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} Docs`",
},
data_key="title-format",
)

description_format = String(
metadata={
"label": "ZIM description",
"description": "ZIM description. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} docs by DevDocs`",
},
data_key="description-format",
validate=validate_zim_description,
)

long_description_format = String(
metadata={
"label": "ZIM long description",
"description": "ZIM long description. You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults "
"to `{full_name} docs by DevDocs`",
},
data_key="long-description-format",
validate=validate_zim_long_description,
)

tags = String(
metadata={
"label": "ZIM Tags",
"description": "List of semi-colon-separated Tags for the ZIM file. "
" You can use placeholders, see "
"https://github.com/openzim/devdocs/blob/main/README.md. Defaults to"
"`devdocs;{slug_without_version}`",
}
)

creator = String(
metadata={
"label": "Creator",
"description": "Name of content creator. “DevDocs” otherwise",
},
)

publisher = String(
metadata={
"label": "Publisher",
"description": "Custom publisher name (ZIM metadata). “openZIM” otherwise",
},
)

output = String(
metadata={
"label": "Output folder",
"placeholder": "/output",
"description": "Output folder for ZIM file(s). Leave it as `/output`",
},
load_default="/output",
dump_default="/output",
validate=validate_output,
)

debug = fields.Boolean(
truthy=[True],
falsy=[False],
metadata={"label": "Debug", "description": "Enable verbose output"},
)

devdocs_frontend_url = String(
metadata={
"label": "DevDocs frontend URL",
"description": "Scheme and hostname for the devdocs frontend."
"Defaults to https://devdocs.io",
},
data_key="devdocs-frontend-url",
)

devdocs_documents_url = String(
metadata={
"label": "DevDocs documents URL",
"description": "Scheme and hostname for the devdocs documents server."
"Defaults to https://documents.devdocs.io",
},
data_key="devdocs-documents-url",
)
1 change: 1 addition & 0 deletions dispatcher/backend/src/utils/offliners.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
Offliner.nautilus: od("nautiluszim", True, False),
Offliner.zimit: od("zimit", True, "statsFilename"),
Offliner.kolibri: od("kolibri2zim", True, False),
Offliner.devdocs: od("devdocs2zim", True, False),
}


Expand Down
6 changes: 3 additions & 3 deletions dispatcher/frontend-ui/src/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -336,17 +336,17 @@ export default {
cancelable_statuses: cancelable_statuses,
running_statuses: running_statuses,
contact_email: "contact@kiwix.org",
categories: ["freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange",
categories: ["devdocs", "freecodecamp", "gutenberg", "ifixit", "other", "phet", "psiram", "stack_exchange",
"ted", "openedx", "vikidia", "wikibooks", "wikihow", "wikinews",
"wikipedia", "wikiquote", "wikisource", "wikispecies", "wikiversity",
"wikivoyage", "wiktionary"], // list of categories for fileering
warehouse_paths: ["/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange",
warehouse_paths: ["/devdocs", "/freecodecamp", "/gutenberg", "/ifixit", "/other", "/phet", "/psiram", "/stack_exchange",
"/ted", "/mooc", "/videos", "/vikidia", "/wikibooks", "/wikihow",
"/wikinews", "/wikipedia", "/wikiquote", "/wikisource",
"/wikiversity", "/wikivoyage", "/wiktionary", "/zimit",
"/.hidden/dev", "/.hidden/private", "/.hidden/endless",
"/.hidden/bard", "/.hidden/bsf", "/.hidden/custom_apps"],
offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp"],
offliners: ["mwoffliner", "youtube", "phet", "gutenberg", "sotoki", "nautilus", "ted", "openedx", "zimit", "kolibri", "wikihow", "ifixit", "freecodecamp", "devdocs"],
periodicities: ["manually", "monthly", "quarterly", "biannualy", "annually"],
memory_values: [536870912, // 512MiB
1073741824, // 1GiB
Expand Down
4 changes: 3 additions & 1 deletion workers/app/common/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@
OFFLINER_WIKIHOW = "wikihow"
OFFLINER_IFIXIT = "ifixit"
OFFLINER_FREECODECAMP = "freecodecamp"
OFFLINER_DEVDOCS = "devdocs"

ALL_OFFLINERS = [
OFFLINER_MWOFFLINER,
Expand All @@ -137,6 +138,7 @@
OFFLINER_WIKIHOW,
OFFLINER_IFIXIT,
OFFLINER_FREECODECAMP,
OFFLINER_DEVDOCS,
]
SUPPORTED_OFFLINERS = [
offliner
Expand All @@ -152,7 +154,7 @@
OFFLINER_YOUTUBE,
]

ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted"]
ALL_PLATFORMS = ["wikimedia", "youtube", "wikihow", "ifixit", "ted", "devdocs"]
PLATFORMS_TASKS = {}
for platform in ALL_PLATFORMS:
name = f"PLATFORM_{platform}_MAX_TASKS"
Expand Down
3 changes: 2 additions & 1 deletion workers/contrib/zimfarm.config.example
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ ZIMFARM_CPU="3"
# Comma-separated list of offliners to run or `""` for all of them. If
# you want to run `youtube` tasks, you need to be whitelisted, contact
# us.
ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp"
ZIMFARM_OFFLINERS="mwoffliner,sotoki,gutenberg,phet,nautilus,ted,openedx,zimit,kolibri,wikihow,ifixit,freecodecamp,devdocs"

# Set to `"y"` to only run task specifically assigned to this worker
# (`""` otherwise)
Expand All @@ -66,4 +66,5 @@ DISABLE_IPV6=""
# PLATFORM_youtube_MAX_TASKS=2
# PLATFORM_wikihow_MAX_TASKS=2
# PLATFORM_ifixit_MAX_TASKS=2
# PLATFORM_devdocs_MAX_TASKS=2
# PLATFORM_ted_MAX_TASKS=2
1 change: 1 addition & 0 deletions workers/contrib/zimfarm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ function restart() {
--env PLATFORM_youtube_MAX_TASKS=$PLATFORM_youtube_MAX_TASKS \
--env PLATFORM_wikihow_MAX_TASKS=$PLATFORM_wikihow_MAX_TASKS \
--env PLATFORM_ifixit_MAX_TASKS=$PLATFORM_ifixit_MAX_TASKS \
--env PLATFORM_devdocs_MAX_TASKS=$PLATFORM_devdocs_MAX_TASKS \
--env PLATFORM_ted_MAX_TASKS=$PLATFORM_ted_MAX_TASKS \
--env POLL_INTERVAL=$POLL_INTERVAL \
--env DNSCACHE_IMAGE=$DNSCACHE_IMAGE \
Expand Down

0 comments on commit 32df240

Please sign in to comment.