Skip to content

Commit

Permalink
ebook: fix ellipsis spacing
Browse files Browse the repository at this point in the history
  • Loading branch information
entorb committed Apr 27, 2024
1 parent a5533c2 commit 770186c
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 1 deletion.
19 changes: 18 additions & 1 deletion scripts/ebook/step_6.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,21 @@
target_file = Path("hpmor.html")


def fix_ellipsis(s: str) -> str:
"""
Fix ellipsis spacing for ebooks.
"""
# 1. remove all spaces around ellipsis
s = re.sub(r" *… *", "…", s)
# 2. recreate spaces around ellipsis
# between words
s = re.sub(r"(?<=[\w])…(?=[\w])", "… ", s)
# after punctuation
s = re.sub(r"(?<=[\.\?!:])…(?=[\w])", " …", s)
# before punctuation : no space, so governed by 1.
return s


if __name__ == "__main__":
print("=== 6. HTML modifications ===")

Expand Down Expand Up @@ -61,13 +76,15 @@
# remove training slashes to satisfy https://validator.w3.org
cont = cont.replace("<br />", "<br>")
cont = cont.replace("<hr />", "<hr>")

cont = re.sub(
r"(<meta [^>]*) />",
r"\1>",
cont,
)

# fix spaces around ellipsis
cont = fix_ellipsis(cont)

# remove bad span ids (containing spaces) from newspaper spans
cont = re.sub(r'<span id="[^"]+" label="[^"]+">', r"<span>", cont, count=5)

Expand Down
24 changes: 24 additions & 0 deletions scripts/ebook/step_6_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""Unit Tests.""" # noqa: INP001
# ruff: noqa: S101

from step_6 import fix_ellipsis

# quotations
assert fix_ellipsis("foo…”") == "foo…”"
assert fix_ellipsis("“…foo") == "“…foo"
# html
assert fix_ellipsis("foo…</p>") == "foo…</p>"
assert fix_ellipsis("<p>…foo") == "<p>…foo"
# between 2 words
assert fix_ellipsis("foo…bar") == "foo… bar"
assert fix_ellipsis("foo …bar") == "foo… bar"
assert fix_ellipsis("foo … bar") == "foo… bar"
assert fix_ellipsis("foo… bar") == "foo… bar"
# start of sentence
assert fix_ellipsis("foo.…bar") == "foo. …bar"
assert fix_ellipsis("foo!…bar") == "foo! …bar"
assert fix_ellipsis("foo?…bar") == "foo? …bar"
# end of sentence
assert fix_ellipsis("foo…. bar") == "foo…. bar"
assert fix_ellipsis("foo…! bar") == "foo…! bar"
assert fix_ellipsis("foo…? bar") == "foo…? bar"

0 comments on commit 770186c

Please sign in to comment.