From 9c0b22c585c52edad484927e575fcd05682a8e2f Mon Sep 17 00:00:00 2001 From: Jeremy Schlatter Date: Tue, 27 Aug 2024 12:04:39 -0700 Subject: [PATCH] ebook fix: remove stray markup from the text (#172) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before this commit, chapter 23 of the ebook had some spurious markup that found its way into the readable text: And Harry brought out the original parchment with the hypotheses, and began scribbling. plus .5minus 1 Observation: Wizardry isn’t as powerful now as it was when Hogwarts was founded. plus .5minus 1 You can see the source of this markup in the comment immediately above the line changed in this commit: # \vskip 1\baselineskip plus .5\textheight minus 1\baselineskip The problem is this regex: "\\vskip .*?\\baselineskip" In the example above, it matches `"\vskip 1\baselineskip"`, stopping at the first `"\baselineskip"` instead of the one at the end of the line. This leaves the errant bit of markup in the text: plus .5\textheight minus 1\baselineskip Which, because it does not start with a backslash, ends up inserted into the content. The fix is simple: remove the `'?'`, turning the `".*"` into a greedy match instead of a minimal match. This matches to the end of the last `"\baselineskip"`, completely removing this bit of markup from the text as intended. --- scripts/ebook/step_3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/ebook/step_3.py b/scripts/ebook/step_3.py index 9aae4073b..f14a275aa 100755 --- a/scripts/ebook/step_3.py +++ b/scripts/ebook/step_3.py @@ -51,7 +51,7 @@ cont = re.sub(r"\\clearpage(\{\}|)\n?", "", cont) # \vskip 1\baselineskip plus .5\textheight minus 1\baselineskip - cont = re.sub(r"\\vskip .*?\\baselineskip", "", cont) + cont = re.sub(r"\\vskip .*\\baselineskip", "", cont) # remove \settowidth{\versewidth}... \begin{verse}[\versewidth] cont = re.sub(