Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added FTS support. #27

Merged
merged 1 commit into from
Sep 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ LABEL org.opencontainers.image.source https://github.com/openzim/devdocs
RUN apt-get update \
&& apt-get install -y --no-install-recommends \
libmagic1 \
libcairo2 \
&& rm -rf /var/lib/apt/lists/* \
&& python -m pip install --no-cache-dir -U \
pip
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ readme = "README.md"
dependencies = [
"requests==2.32.3",
"pydantic==2.8.2",
"zimscraperlib==3.4.0",
"zimscraperlib==4.0.0",
"Jinja2==3.1.3",
"beautifulsoup4==4.12.3",
]
dynamic = ["authors", "classifiers", "keywords", "license", "version", "urls"]

Expand Down
26 changes: 18 additions & 8 deletions src/devdocs2zim/generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from collections import defaultdict
from pathlib import Path

from bs4 import BeautifulSoup
from jinja2 import Environment, FileSystemLoader, select_autoescape
from pydantic import BaseModel
from zimscraperlib.constants import ( # pyright: ignore[reportMissingTypeStubs]
Expand All @@ -16,7 +17,11 @@
Creator,
StaticItem,
)
from zimscraperlib.zim.indexing import ( # pyright: ignore[reportMissingTypeStubs]
IndexData,
)

# pyright: ignore[reportMissingTypeStubs]
from devdocs2zim.client import (
DevdocsClient,
DevdocsIndex,
Expand Down Expand Up @@ -339,6 +344,7 @@ def load_common_files(self) -> list[StaticItem]:
content=app_css,
is_front=False,
mimetype="text/css",
auto_index=False,
)
)

Expand All @@ -353,6 +359,7 @@ def load_common_files(self) -> list[StaticItem]:
),
is_front=True,
mimetype="text/plain",
auto_index=False,
)
)

Expand Down Expand Up @@ -419,10 +426,6 @@ def generate_zim(
Illustration_48x48_at_1=self.logo_path.read_bytes(),
)

# Disable indexing because it won't be available in the JS frontend
# and causes significant performance issues with rendered sidebars.
creator.config_indexing(False)

# Start creator early to detect problems early.
with creator as started_creator:
logger.info(" Fetching the index...")
Expand Down Expand Up @@ -496,14 +499,16 @@ def add_zim_contents(
num_slashes = path.count("/")
rel_prefix = "../" * num_slashes

content = MISSING_PAGE
if path in db:
content = db.get(path)
else:
content = db.get(path, MISSING_PAGE)
if path not in db:
logger.warning(
f" DevDocs is missing content for {title!r} at {path!r}."
)

plain_content = " ".join(
BeautifulSoup(content, features="lxml").find_all(string=True)
)

# NOTE: Profiling indicates Jinja templating takes about twice
# the CPU time as adding items without compression. This appears to
# be because of the navigation bar.
Expand All @@ -527,6 +532,11 @@ def add_zim_contents(
# navigation bar.
should_compress=True,
mimetype="text/html",
# Only index page content rather than navigation data.
index_data=IndexData(
title=title,
content=plain_content,
),
)

# Tracking metadta
Expand Down