Skip to content

Commit

Permalink
Improve whitespace handling to match old library
Browse files Browse the repository at this point in the history
  • Loading branch information
mbollmann committed Dec 23, 2024
1 parent d61571e commit 4bedd70
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 5 deletions.
10 changes: 5 additions & 5 deletions python/acl_anthology/text/markuptext.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def as_text(self) -> str:
The plain text with any markup stripped. The only transformation that will be performed is replacing TeX-math expressions with their corresponding Unicode representation, if possible.
"""
if isinstance(self._content, str):
return self._content
return remove_extra_whitespace(self._content)
if self._text is not None:
return self._text
element = deepcopy(self._content)
Expand All @@ -123,7 +123,7 @@ def as_html(self, allow_url: bool = True) -> str:
`<a href="...">` tags, but in simply `<span>` tags.
"""
if isinstance(self._content, str):
return xml_escape(self._content)
return xml_escape(remove_extra_whitespace(self._content))
if self._html is not None:
return self._html
element = deepcopy(self._content)
Expand Down Expand Up @@ -153,10 +153,10 @@ def as_latex(self) -> str:
if self._latex is not None:
return self._latex
if isinstance(self._content, str):
self._latex = latex_convert_quotes(latex_encode(self._content))
latex = latex_convert_quotes(latex_encode(self._content))
else:
latex = markup_to_latex(self._content)
self._latex = remove_extra_whitespace(latex)
self._latex = remove_extra_whitespace(latex)
return self._latex

def as_xml(self) -> str:
Expand All @@ -167,7 +167,7 @@ def as_xml(self) -> str:
if isinstance(self._content, str):
return xml_escape(self._content)
if self._xml is None:
self._xml = remove_extra_whitespace(stringify_children(self._content))
self._xml = stringify_children(self._content)
return self._xml

@classmethod
Expand Down
16 changes: 16 additions & 0 deletions python/tests/text/markuptext_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,22 @@
"latex": "Workshop on Topic A \\& B",
},
),
(
"Title with\n\n line breaks",
{
"text": "Title with line breaks",
"html": "Title with line breaks",
"latex": "Title with line breaks",
},
),
(
"<span>Title with\n\n line breaks</span>",
{
"text": "Title with line breaks",
"html": "<span>Title with line breaks</span>",
"latex": "Title with line breaks",
},
),
(
"<fixed-case>U</fixed-case>pstream <fixed-case>M</fixed-case>itigation <fixed-case>I</fixed-case>s <i><fixed-case>N</fixed-case>ot</i> <fixed-case>A</fixed-case>ll <fixed-case>Y</fixed-case>ou <fixed-case>N</fixed-case>eed",
{
Expand Down

0 comments on commit 4bedd70

Please sign in to comment.