From 0e313f759eaed237c710896f49583a9f2e58f7b6 Mon Sep 17 00:00:00 2001 From: Joseph Lewis III Date: Sat, 10 Aug 2024 12:32:27 -0700 Subject: [PATCH] Added generator logic. --- pyproject.toml | 3 +- src/devdocs2zim/assets/COPYRIGHT | 13 + src/devdocs2zim/assets/LICENSE | 373 +++++++++++++++++++++++++ src/devdocs2zim/assets/README.md | 7 + src/devdocs2zim/assets/devdocs_48.png | Bin 0 -> 1108 bytes src/devdocs2zim/client.py | 86 +++++- src/devdocs2zim/generator.py | 223 ++++++++++++++- src/devdocs2zim/templates/license.html | 17 ++ src/devdocs2zim/templates/page.html | 47 ++++ tests/test_basic.py | 7 + 10 files changed, 769 insertions(+), 7 deletions(-) create mode 100644 src/devdocs2zim/assets/COPYRIGHT create mode 100644 src/devdocs2zim/assets/LICENSE create mode 100644 src/devdocs2zim/assets/README.md create mode 100644 src/devdocs2zim/assets/devdocs_48.png create mode 100644 src/devdocs2zim/templates/license.html create mode 100644 src/devdocs2zim/templates/page.html create mode 100644 tests/test_basic.py diff --git a/pyproject.toml b/pyproject.toml index 8e27708..2bbad08 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "requests==2.32.3", "pydantic==2.8.2", "zimscraperlib==3.4.0", + "Jinja2==3.1.3", ] dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"] @@ -23,7 +24,7 @@ lint = [ "ruff==0.5.1", ] check = [ - "pyright==1.1.370", + "pyright==1.1.374", ] test = [ "pytest==8.2.2", diff --git a/src/devdocs2zim/assets/COPYRIGHT b/src/devdocs2zim/assets/COPYRIGHT new file mode 100644 index 0000000..9c520b8 --- /dev/null +++ b/src/devdocs2zim/assets/COPYRIGHT @@ -0,0 +1,13 @@ +Copyright 2013-2024 Thibaut Courouble and other contributors + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +Please do not use the name DevDocs to endorse or promote products +derived from this software without the maintainers' permission, except +as may be necessary to comply with the notice/attribution requirements. + +We also wish that any documentation file generated using this software +be attributed to DevDocs. Let's be fair to all contributors by giving +credit where credit's due. Thanks. diff --git a/src/devdocs2zim/assets/LICENSE b/src/devdocs2zim/assets/LICENSE new file mode 100644 index 0000000..a612ad9 --- /dev/null +++ b/src/devdocs2zim/assets/LICENSE @@ -0,0 +1,373 @@ +Mozilla Public License Version 2.0 +================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns Covered Software. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Software of a particular Contributor. + +1.4. "Covered Software" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Executable Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means + + (a) that the initial Contributor has attached the notice described + in Exhibit B to the Covered Software; or + + (b) that the Covered Software was made available under the terms of + version 1.1 or earlier of the License, but not also under the + terms of a Secondary License. + +1.6. "Executable Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines Covered Software with other material, in + a separate file or files, that is not Covered Software. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of Covered + Software; or + + (b) any new file in Source Code Form that contains any Covered + Software. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0, the GNU + Lesser General Public License, Version 2.1, the GNU Affero General + Public License, Version 3.0, or any later versions of those + licenses. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Software under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Software; + or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of Covered Software, or (ii) the combination of its + Contributions with other software (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by Covered Software in the absence of + its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Software under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any +Modifications that You create or to which You contribute, must be under +the terms of this License. You must inform recipients that the Source +Code Form of the Covered Software is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + +(a) such Covered Software must also be made available in Source Code + Form, as described in Section 3.1, and You must inform recipients of + the Executable Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Executable Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Executable Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Software. If the Larger Work is a combination of Covered +Software with a work governed by one or more Secondary Licenses, and the +Covered Software is not Incompatible With Secondary Licenses, this +License permits You to additionally distribute such Covered Software +under the terms of such Secondary License(s), so that the recipient of +the Larger Work may, at their option, further distribute the Covered +Software under the terms of either this License or such Secondary +License(s). + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Software, except that You may alter any license notices to +the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of Covered +Software. However, You may do so only on Your own behalf, and not on +behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Software due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Software under this License. Except to the extent prohibited by statute +or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Software under Section +2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* Covered Software is provided under this License on an "as is" * +* basis, without warranty of any kind, either expressed, implied, or * +* statutory, including, without limitation, warranties that the * +* Covered Software is free of defects, merchantable, fit for a * +* particular purpose or non-infringing. The entire risk as to the * +* quality and performance of the Covered Software is with You. * +* Should any Covered Software prove defective in any respect, You * +* (not any Contributor) assume the cost of any necessary servicing, * +* repair, or correction. This disclaimer of warranty constitutes an * +* essential part of this License. No use of any Covered Software is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Software as * +* permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version +of the License under which You originally received the Covered Software, +or under the terms of any subsequent version published by the license +steward. + +10.3. Modified Versions + +If you create software not governed by this License, and you want to +create a new license for such software, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the Mozilla Public + License, v. 2.0. If a copy of the MPL was not distributed with this + file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Mozilla Public License, v. 2.0. diff --git a/src/devdocs2zim/assets/README.md b/src/devdocs2zim/assets/README.md new file mode 100644 index 0000000..41abd43 --- /dev/null +++ b/src/devdocs2zim/assets/README.md @@ -0,0 +1,7 @@ +These files are copied from DevDocs: + + https://github.com/freeCodeCamp/devdocs + +devdocs_48.png is adapted from: + + https://github.com/freeCodeCamp/devdocs/blob/0dd0ad813f81d3c8e3d040095992e61b7398be96/public/images/icon-64.png diff --git a/src/devdocs2zim/assets/devdocs_48.png b/src/devdocs2zim/assets/devdocs_48.png new file mode 100644 index 0000000000000000000000000000000000000000..f8b5b77cb845298a38b22883c0cf27e175b91f81 GIT binary patch literal 1108 zcmX|BcTC$y6#fATqy$SuC@qbm#A(t$3ZaTxDS?!xKunmSKxt7zn>uX-W@9jB8k=GE zGGNRGvxhOpY@2O{F%U4uEVG9(+knzWja2#~OFik{y?grZ-Fx46x+F&jE0p#PZ3ID3 zHr5ztSRZ_xS2Vz4mp^Gk5cM~X_AZv{ut$)AfdM|B-`UyO*4D=1a2gsKs;a6=N=owc z^Dizgz{04mu5NE{2P8!D^72%{va&KRm)qCZ$6zoZQdd_8J~#yh1#C7Oz~HOE5Mi-c zy}i97BO_B&Qws|V8yg#YdwWWyQk4M!04^*nY-wqkoSalB6s4u5fQRFdRw9uc9v;@# z)^>Gu0j#2;VtRV|04x*=kB*M&>+1yq0Ysn?fGsU8LC*iHf?FUVbP9mh z)>g2pd%;;>Uw3zR_wexW_xBGA3yX`3TU}jEO-;?r%$%Q}hqR52jZi9__4W1j_V!*| zTbr4g+27wE8yhPxFAoh3&CSg{K0X$U#Xdeh$;rubxqNqbcW7t`hr?}eZv!EIettwE zkxHdzWn~>49FWQ6pr9a6PtWM+=<)IK+1c5&w6w^`$l~H+C^#Y_A}}y8I5;>bCnr8W zzPY)XKp+GJ1Z-|@f;3xOTR?tCN5}B+a7s!_Wo6~)=%`F41MQlcnwFQBNhA`J$&^Z^ zKoMvc8ylODkkH@XKR7s;k&%JN7?|F%cx7P$)oSc6Rp8&Q3^32s|6mnnt71>2wwOD$#*? zAeLZkfr3Y-MsRlc2{Dr|xPxC}gE4op9zB}tu~UD2Q%fmumfSJFa%bA;E#JeWndz;> zD+vL?oNBd|)osx1+>LLp^jV?LS670|i zT9#oRx^GFvj-Toq&k9XDt-I%ZQJy{ZMh!(Fsx~0Q52tQ{X8h%w4%D&E{ zjlOc#*Eu*6^ZkV^q<4@0(nYS%_z(}n~-~H6JP(d{L zz1U_uBD*S*HA?wiWcSccPq5+iyt)Z9#WQ{oEW|LH+$#gLL&Zrn<$l< mlT4iUpS|MSd=SeGMQ4x7Y(x*e?&0YlKL;C22MpT+8}lC~g=c>N literal 0 HcmV?d00001 diff --git a/src/devdocs2zim/client.py b/src/devdocs2zim/client.py index 8ee84a4..c6d1c65 100644 --- a/src/devdocs2zim/client.py +++ b/src/devdocs2zim/client.py @@ -1,3 +1,7 @@ +import re +from collections import defaultdict +from enum import Enum + import requests from pydantic import BaseModel, TypeAdapter @@ -5,6 +9,15 @@ HTTP_TIMEOUT_SECONDS = 15 +# These regular expressions are extracted from the DevDocs frontend. +# The expression definitions haven't changed in ~8 years as of 2024-07-28: +# https://github.com/freeCodeCamp/devdocs/blob/e28f81d3218bdbad7eac0540c58c11c7fe1d33d3/assets/javascripts/collections/types.js#L3 +BEFORE_CONTENT_PATTERN = re.compile( + r"(^|\()(guides?|tutorials?|reference|book|getting\ started|manual|examples)($|[\):])", # noqa: E501 + re.IGNORECASE, +) +AFTER_CONTENT_PATTERN = re.compile(r"appendix", re.IGNORECASE) + class DevdocsMetadataLinks(BaseModel): """Project links for a specific documentation set.""" @@ -74,7 +87,7 @@ class DevdocsIndexEntry(BaseModel): path: str # Name of the type (section) the entry is located under. - type: str + type: str | None @property def path_without_fragment(self) -> str: @@ -82,6 +95,16 @@ def path_without_fragment(self) -> str: return self.path.split("#")[0] +class SortPrecedence(Enum): + """Represents where to place section in the navbar.""" + + # NOTE: Definition order must match display order. + + BEFORE_CONTENT = 0 + CONTENT = 1 + AFTER_CONTENT = 2 + + class DevdocsIndexType(BaseModel): """A section header for documentation.""" @@ -94,6 +117,37 @@ class DevdocsIndexType(BaseModel): # Section slug. This appears to be unused. slug: str + def sort_precedence(self) -> SortPrecedence: + """Determines where this section should be displayed in the navigation.""" + if BEFORE_CONTENT_PATTERN.match(self.name): + return SortPrecedence.BEFORE_CONTENT + + if AFTER_CONTENT_PATTERN.match(self.name): + return SortPrecedence.AFTER_CONTENT + + return SortPrecedence.CONTENT + + +class NavigationSection: + """Represents a single section of a devdocs navigation tree.""" + + def __init__(self, section: DevdocsIndexType, links: list[DevdocsIndexEntry]): + """Initializes NavigationSection. + + Parameters: + section: Heading information for the group of links. + links: Links to display in the section. + """ + self.name = section.name + self.count = section.count + self.links = links + + self._contained_pages = {link.path_without_fragment for link in links} + + def contains_page(self, page_path: str) -> bool: + """Returns whether this section contains the given page.""" + return page_path in self._contained_pages + class DevdocsIndex(BaseModel): """Represents entries in the //index.json file for each resource.""" @@ -102,10 +156,36 @@ class DevdocsIndex(BaseModel): entries: list[DevdocsIndexEntry] # List of "types" or section headings. - # These are displayed mostly in order, except regular expressions are used to sort: - # https://github.com/freeCodeCamp/devdocs/blob/e28f81d3218bdbad7eac0540c58c11c7fe1d33d3/assets/javascripts/collections/types.js#L3 + # These are displayed in the order they're found grouped by sort_precedence. types: list[DevdocsIndexType] + def build_navigation(self) -> list[NavigationSection]: + """Builds a navigation hierarchy that's soreted correctly for rendering.""" + + sections_by_precedence: dict[SortPrecedence, list[DevdocsIndexType]] = ( + defaultdict(list) + ) + for section in self.types: + sections_by_precedence[section.sort_precedence()].append(section) + + links_by_section_name: dict[str, list[DevdocsIndexEntry]] = defaultdict(list) + for entry in self.entries: + if entry.type is None: + continue + links_by_section_name[entry.type].append(entry) + + output: list[NavigationSection] = [] + for precedence in SortPrecedence: + for section in sections_by_precedence[precedence]: + output.append( + NavigationSection( + section=section, + links=links_by_section_name[section.name], + ) + ) + + return output + class DevdocsClient: """Utility functions to read data from devdocs.""" diff --git a/src/devdocs2zim/generator.py b/src/devdocs2zim/generator.py index c903a7d..e19b44c 100644 --- a/src/devdocs2zim/generator.py +++ b/src/devdocs2zim/generator.py @@ -1,20 +1,37 @@ +# ruff: noqa: S607 + import argparse +import datetime import os import re +import time from collections import defaultdict +from pathlib import Path +from jinja2 import Environment, FileSystemLoader, select_autoescape from pydantic import BaseModel from zimscraperlib.constants import ( # pyright: ignore[reportMissingTypeStubs] MAXIMUM_DESCRIPTION_METADATA_LENGTH, MAXIMUM_LONG_DESCRIPTION_METADATA_LENGTH, RECOMMENDED_MAX_TITLE_LENGTH, ) +from zimscraperlib.zim import ( # pyright: ignore[reportMissingTypeStubs] + Creator, + StaticItem, +) from devdocs2zim.client import ( DevdocsClient, + DevdocsIndex, DevdocsMetadata, ) -from devdocs2zim.constants import logger +from devdocs2zim.constants import LANGUAGE_ISO_639_3, NAME, ROOT_DIR, VERSION, logger + +# Content to display for pages missing from DevDocs. +MISSING_PAGE = ( + "

This documentation is missing.

" + "

This is an error with DevDocs, not your ZIM reader e.g. Kiwix.

" +) class InvalidFormatError(Exception): @@ -274,6 +291,62 @@ def __init__( os.makedirs(self.output_folder, exist_ok=True) + # jinja2 environment setup + self.env = Environment( # type: ignore + loader=FileSystemLoader(ROOT_DIR.joinpath("templates")), + autoescape=select_autoescape(), + ) + + self.page_template = self.env.get_template("page.html") # type: ignore + self.license_template = self.env.get_template("license.html") # type: ignore + + self.logo_path = self.asset_path("devdocs_48.png") + self.copyright_path = self.asset_path("COPYRIGHT") + self.license_path = self.asset_path("LICENSE") + + @staticmethod + def asset_path(name: str) -> Path: + """Returns the path to name in the assets folder. + + Raises ValueError if the resource doesn't exist. + """ + path = ROOT_DIR.joinpath("assets", name) + if not path.exists(): + raise ValueError(f"File not found at {path}") + return path + + def load_common_files(self) -> list[StaticItem]: + """Loads common assets for the output.""" + static_files: list[StaticItem] = [] + + logger.info("Fetching common CSS...") + app_css = self.devdocs_client.read_application_css() + logger.debug(f" Found app CSS with {len(app_css)} chars.") + static_files.append( + StaticItem( + path="application.css", + content=app_css, + is_front=False, + mimetype="text/css", + ) + ) + + static_files.append( + StaticItem( + # Documentation doesn't end in .html so this file won't + # conflict. + path="licenses.html", + content=self.license_template.render( # type: ignore + copyright=self.copyright_path.read_text(), + license=self.license_path.read_text(), + ), + is_front=True, + mimetype="text/html", + ) + ) + + return static_files + def run(self) -> None: """Run the generator to fetch content and produce ZIMs.""" @@ -281,8 +354,152 @@ def run(self) -> None: all_docs = self.devdocs_client.list_docs() selected_doc_metadata = self.doc_filter.filter(all_docs) + # Check formatting early to bail if any templates are invalid. + for doc_metadata in selected_doc_metadata: + self.zim_config.format(doc_metadata.placeholders()) + + common_resources = self.load_common_files() + # List all docs and copy one by one for doc_metadata in selected_doc_metadata: - logger.info(f"Fetching {doc_metadata.slug}") + self.generate_zim( + doc_metadata, + common_resources, + ) + + def generate_zim( + self, doc_metadata: DevdocsMetadata, common_resources: list[StaticItem] + ): + """Generates a zim for a single document.""" + logger.info(f"Generating ZIM for {doc_metadata.slug}") + + formatted_config = self.zim_config.format(doc_metadata.placeholders()) + zim_path = Path(self.output_folder, f"{formatted_config.name_format}.zim") + + if zim_path.exists(): + logger.warning(f" Skipping, {zim_path} already exists.") + return + + logger.info(f" Writing to: {zim_path}") + + creator = Creator(zim_path, "index") + creator.config_metadata( + Name=formatted_config.name_format, + Title=formatted_config.title_format, + Publisher=formatted_config.publisher, + Date=datetime.datetime.now(tz=datetime.UTC).date(), + Creator=formatted_config.creator, + Description=formatted_config.description_format, + LongDescription=formatted_config.long_description_format, + Language=LANGUAGE_ISO_639_3, + Tags=formatted_config.tags, + Scraper=f"{NAME} v{VERSION}", + Illustration_48x48_at_1=self.logo_path.read_bytes(), + ) + + # Disable indexing because it won't be available in the JS frontend + # and causes significant performance issues with rendered sidebars. + creator.config_indexing(False) + + # Start creator early to detect problems early. + with creator as started_creator: + logger.info(" Fetching the index...") + index = self.devdocs_client.get_index(doc_metadata.slug) + logger.debug(f" The index has {len(index.entries)} entries.") + + logger.info(" Fetching the document database...") + db = self.devdocs_client.get_db(doc_metadata.slug) + logger.debug(f" The database has {len(db)} entries.") + + self.add_zim_contents( + creator=started_creator, + doc_metadata=doc_metadata, + index=index, + db=db, + common_resources=common_resources, + ) + + def add_zim_contents( + self, + creator: Creator, + doc_metadata: DevdocsMetadata, + index: DevdocsIndex, + db: dict[str, str], + common_resources: list[StaticItem], + ): + """Adds the doc conents to the ZIM. + + Parameters: + creator: ZIM writer. + doc_metadata: Document metadata for generating common pages. + index: Documentation index for the navigation bar. + db: Mapping between documentation path and HTML content. + common_resources: Static content to add to the documentation. + """ - raise NotImplementedError("ZIM creation is not yet implemented") + logger.info(" Adding common resources...") + for resource in common_resources: + creator.add_item(resource) # type: ignore + + # Set the title for each page to the navigation item that opens the page + # to the top i.e. without a fragment if it exists. Otherwise, the first + # navigation item that opens the page. + page_to_title: dict[str, str] = {} + for entry in index.entries: + path_without_fragment = entry.path_without_fragment + if path_without_fragment == entry.path: + page_to_title[path_without_fragment] = entry.name + elif path_without_fragment not in page_to_title: + page_to_title[path_without_fragment] = entry.name + + # Explicitly inject the index. + page_to_title["index"] = f"{doc_metadata.name} Documentation" + + nav_sections = index.build_navigation() + + logger.info(f" Rendering {len(page_to_title)} pages...") + counter = 0 + render_delta = 0.0 + add_delta = 0.0 + for path, title in page_to_title.items(): + num_slashes = path.count("/") + rel_prefix = "../" * num_slashes + + start_render = time.time() + page_content = self.page_template.render( # type: ignore + rel_prefix=rel_prefix, + nav_sections=nav_sections, + devdocs_metadata=doc_metadata, + title=title, + path=path, + # Fill missing DevDocs content with indications that the issue + # isn't with this generator. + content=db.get(path, MISSING_PAGE), + ) + start_add = time.time() + creator.add_item_for( # type: ignore + path, + title=title, + content=page_content, # type: ignore + is_front=True, + # Compression is needed because images are embedded as Base64 and + # navigation is similar across pages. + should_compress=True, + mimetype="text/html", + ) + end = time.time() + + # Tracking metadta + render_delta += start_add - start_render + add_delta += end - start_add + counter += 1 + if counter % 100 == 0: + logger.debug( + f" Progress {counter} / {len(page_to_title)} pages " + f"({render_delta:0.2f}s rendering, {add_delta:0.2f}s adding)" + ) + + logger.info( + " Finished adding contents. " + f"({render_delta:0.2f}s rendering, {add_delta:0.2f}s adding)" + ) diff --git a/src/devdocs2zim/templates/license.html b/src/devdocs2zim/templates/license.html new file mode 100644 index 0000000..fd1f51c --- /dev/null +++ b/src/devdocs2zim/templates/license.html @@ -0,0 +1,17 @@ +{# Devdocs is an SPA so each page will have (nearly) identical content. #} + + + + Open-source License Information + + +

This work contains resources from DevDocs.io licensed under + the following license.

+ +

COPYRIGHT

+
{{ copyright }}
+ +

LICENSE

+
{{ license }}
+ + diff --git a/src/devdocs2zim/templates/page.html b/src/devdocs2zim/templates/page.html new file mode 100644 index 0000000..dac255b --- /dev/null +++ b/src/devdocs2zim/templates/page.html @@ -0,0 +1,47 @@ +{# Devdocs is an SPA so each page will have (nearly) identical content. #} + + + + {{title}} + + + + +
+ +
+
+ {{devdocs_metadata.name}} +
+ {% for section in nav_sections %} +
+ + + {{ section.count | safe}} + {{ section.name }} + + +
+ {% for link in section.links %} + + {{ link.name }} + + {% endfor %} +
+
+ {% endfor %} + Open-source Licenses +
+
+
+
+
+
{{ content | safe }}
+
+
+
+ + + diff --git a/tests/test_basic.py b/tests/test_basic.py new file mode 100644 index 0000000..39ac61e --- /dev/null +++ b/tests/test_basic.py @@ -0,0 +1,7 @@ +# pyright: strict, reportUnusedExpression=false + +from devdocs2zim.__about__ import __version__ + + +def test_version(): + assert "dev" in __version__