Skip to content

Added FTS support. #27

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@ LABEL org.opencontainers.image.source https://github.com/openzim/devdocs
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libmagic1 \
libcairo2 \
&& rm -rf /var/lib/apt/lists/* \
&& python -m pip install --no-cache-dir -U \
pip
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -10,8 +10,9 @@ readme = "README.md"
dependencies = [
"requests==2.32.3",
"pydantic==2.8.2",
"zimscraperlib==3.4.0",
"zimscraperlib==4.0.0",
"Jinja2==3.1.3",
"beautifulsoup4==4.12.3",
]
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]

26 changes: 18 additions & 8 deletions src/devdocs2zim/generator.py
Original file line number Diff line number Diff line change
@@ -5,6 +5,7 @@
from collections import defaultdict
from pathlib import Path

from bs4 import BeautifulSoup
from jinja2 import Environment, FileSystemLoader, select_autoescape
from pydantic import BaseModel
from zimscraperlib.constants import ( # pyright: ignore[reportMissingTypeStubs]
@@ -16,7 +17,11 @@
Creator,
StaticItem,
)
from zimscraperlib.zim.indexing import ( # pyright: ignore[reportMissingTypeStubs]
IndexData,
)

# pyright: ignore[reportMissingTypeStubs]
from devdocs2zim.client import (
DevdocsClient,
DevdocsIndex,
@@ -339,6 +344,7 @@ def load_common_files(self) -> list[StaticItem]:
content=app_css,
is_front=False,
mimetype="text/css",
auto_index=False,
)
)

@@ -353,6 +359,7 @@ def load_common_files(self) -> list[StaticItem]:
),
is_front=True,
mimetype="text/plain",
auto_index=False,
)
)

@@ -419,10 +426,6 @@ def generate_zim(
Illustration_48x48_at_1=self.logo_path.read_bytes(),
)

# Disable indexing because it won't be available in the JS frontend
# and causes significant performance issues with rendered sidebars.
creator.config_indexing(False)

# Start creator early to detect problems early.
with creator as started_creator:
logger.info(" Fetching the index...")
@@ -496,14 +499,16 @@ def add_zim_contents(
num_slashes = path.count("/")
rel_prefix = "../" * num_slashes

content = MISSING_PAGE
if path in db:
content = db.get(path)
else:
content = db.get(path, MISSING_PAGE)
if path not in db:
logger.warning(
f" DevDocs is missing content for {title!r} at {path!r}."
)

plain_content = " ".join(
BeautifulSoup(content, features="lxml").find_all(string=True)
)

# NOTE: Profiling indicates Jinja templating takes about twice
# the CPU time as adding items without compression. This appears to
# be because of the navigation bar.
@@ -527,6 +532,11 @@ def add_zim_contents(
# navigation bar.
should_compress=True,
mimetype="text/html",
# Only index page content rather than navigation data.
index_data=IndexData(
title=title,
content=plain_content,
),
)

# Tracking metadta