Skip to content

Commit

Permalink
Escape underscores (#208)
Browse files Browse the repository at this point in the history
* IdentifierConverter: Add the option to not escape underscores

This allows for generating symbols with subscripts (possibly multiple layers), e.g. `x_2_i` becomes `x_{2_{i}}`. This option also creates the possibility of generating invalid latex if the identifier starts or ends with an underscore, or has a double underscore in it. In these cases we now raise a ValueError.

* CodeGen: Add the plumbing to allow the use of the new `escape_underscores` option in IdentifierConverter

* IdentifierCodegen: Ensure all subscripts are wrapped in braces

Previously generated latex was incorrect for multi-character subscripts.
Tests updated to reflect new (correct) behaviour.

* IdentifierConverter: Simplify error checking for invalid LaTeX when escaping underscores

* IdentifierConverter: Fix too-long lines that flake8 complains about
  • Loading branch information
bjude authored Feb 13, 2025
1 parent f9c0797 commit 54dc869
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 30 deletions.
23 changes: 18 additions & 5 deletions src/latexify/codegen/algorithmic_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,11 @@ class AlgorithmicCodegen(ast.NodeVisitor):
_indent_level: int

def __init__(
self, *, use_math_symbols: bool = False, use_set_symbols: bool = False
self,
*,
use_math_symbols: bool = False,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.
Expand All @@ -33,11 +37,14 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._expression_codegen = expression_codegen.ExpressionCodegen(
use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols
use_math_symbols=use_math_symbols,
use_set_symbols=use_set_symbols,
escape_underscores=escape_underscores,
)
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols,
use_mathrm=False,
escape_underscores=escape_underscores,
)
self._indent_level = 0

Expand Down Expand Up @@ -192,7 +199,11 @@ class IPythonAlgorithmicCodegen(ast.NodeVisitor):
_indent_level: int

def __init__(
self, *, use_math_symbols: bool = False, use_set_symbols: bool = False
self,
*,
use_math_symbols: bool = False,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.
Expand All @@ -202,10 +213,12 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._expression_codegen = expression_codegen.ExpressionCodegen(
use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols
use_math_symbols=use_math_symbols,
use_set_symbols=use_set_symbols,
escape_underscores=escape_underscores,
)
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols
use_math_symbols=use_math_symbols, escape_underscores=escape_underscores
)
self._indent_level = 0

Expand Down
8 changes: 6 additions & 2 deletions src/latexify/codegen/expression_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,11 @@ class ExpressionCodegen(ast.NodeVisitor):
_compare_ops: dict[type[ast.cmpop], str]

def __init__(
self, *, use_math_symbols: bool = False, use_set_symbols: bool = False
self,
*,
use_math_symbols: bool = False,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.
Expand All @@ -28,7 +32,7 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols
use_math_symbols=use_math_symbols, escape_underscores=escape_underscores
)

self._bin_op_rules = (
Expand Down
7 changes: 5 additions & 2 deletions src/latexify/codegen/function_codegen.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ def __init__(
use_math_symbols: bool = False,
use_signature: bool = True,
use_set_symbols: bool = False,
escape_underscores: bool = True,
) -> None:
"""Initializer.
Expand All @@ -36,10 +37,12 @@ def __init__(
use_set_symbols: Whether to use set symbols or not.
"""
self._expression_codegen = expression_codegen.ExpressionCodegen(
use_math_symbols=use_math_symbols, use_set_symbols=use_set_symbols
use_math_symbols=use_math_symbols,
use_set_symbols=use_set_symbols,
escape_underscores=escape_underscores,
)
self._identifier_converter = identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols
use_math_symbols=use_math_symbols, escape_underscores=escape_underscores
)
self._use_signature = use_signature

Expand Down
38 changes: 36 additions & 2 deletions src/latexify/codegen/identifier_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,18 +17,28 @@ class IdentifierConverter:

_use_math_symbols: bool
_use_mathrm: bool
_escape_underscores: bool

def __init__(self, *, use_math_symbols: bool, use_mathrm: bool = True) -> None:
def __init__(
self,
*,
use_math_symbols: bool,
use_mathrm: bool = True,
escape_underscores: bool = True,
) -> None:
r"""Initializer.
Args:
use_math_symbols: Whether to convert identifiers with math symbol names to
appropriate LaTeX command.
use_mathrm: Whether to wrap the resulting expression by \mathrm, if
applicable.
escape_underscores: Whether to prefix any underscores in identifiers with
'\\', disable to allow subscripts in generated latex.
"""
self._use_math_symbols = use_math_symbols
self._use_mathrm = use_mathrm
self._escape_underscores = escape_underscores

def convert(self, name: str) -> tuple[str, bool]:
"""Converts Python identifier to LaTeX expression.
Expand All @@ -41,14 +51,38 @@ def convert(self, name: str) -> tuple[str, bool]:
- latex: Corresponding LaTeX expression.
- is_single_character: Whether `latex` can be treated as a single
character or not.
Raises:
LatexifyError: Resulting latex is not valid. This most likely occurs where
the symbol starts or ends with an underscore, and escape_underscores=False.
"""
if not self._escape_underscores and "_" in name:
# Check if we are going to generate an invalid Latex string. Better to
# raise an exception here than have the resulting Latex fail to
# compile/display
name_splits = name.split("_")
if not all(name_splits):
raise ValueError(
"Neither preceding/trailing underscores nor double underscores is "
f"allowed by the `escape_underscores` option, but got: {name}"
)
elems = [
IdentifierConverter(
use_math_symbols=self._use_math_symbols,
use_mathrm=False,
escape_underscores=True,
).convert(n)[0]
for n in name_splits
]
# Wrap sub identifiers in nested braces
name = "_{".join(elems) + "}" * (len(elems) - 1)

if self._use_math_symbols and name in expression_rules.MATH_SYMBOLS:
return "\\" + name, True

if len(name) == 1 and name != "_":
return name, True

escaped = name.replace("_", r"\_")
escaped = name.replace("_", r"\_") if self._escape_underscores else name
wrapped = rf"\mathrm{{{escaped}}}" if self._use_mathrm else escaped

return wrapped, False
81 changes: 62 additions & 19 deletions src/latexify/codegen/identifier_converter_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,32 +8,75 @@


@pytest.mark.parametrize(
"name,use_math_symbols,use_mathrm,expected",
"name,use_math_symbols,use_mathrm,escape_underscores,expected",
[
("a", False, True, ("a", True)),
("_", False, True, (r"\mathrm{\_}", False)),
("aa", False, True, (r"\mathrm{aa}", False)),
("a1", False, True, (r"\mathrm{a1}", False)),
("a_", False, True, (r"\mathrm{a\_}", False)),
("_a", False, True, (r"\mathrm{\_a}", False)),
("_1", False, True, (r"\mathrm{\_1}", False)),
("__", False, True, (r"\mathrm{\_\_}", False)),
("a_a", False, True, (r"\mathrm{a\_a}", False)),
("a__", False, True, (r"\mathrm{a\_\_}", False)),
("a_1", False, True, (r"\mathrm{a\_1}", False)),
("alpha", False, True, (r"\mathrm{alpha}", False)),
("alpha", True, True, (r"\alpha", True)),
("foo", False, True, (r"\mathrm{foo}", False)),
("foo", True, True, (r"\mathrm{foo}", False)),
("foo", True, False, (r"foo", False)),
("a", False, True, True, ("a", True)),
("_", False, True, True, (r"\mathrm{\_}", False)),
("aa", False, True, True, (r"\mathrm{aa}", False)),
("a1", False, True, True, (r"\mathrm{a1}", False)),
("a_", False, True, True, (r"\mathrm{a\_}", False)),
("_a", False, True, True, (r"\mathrm{\_a}", False)),
("_1", False, True, True, (r"\mathrm{\_1}", False)),
("__", False, True, True, (r"\mathrm{\_\_}", False)),
("a_a", False, True, True, (r"\mathrm{a\_a}", False)),
("a__", False, True, True, (r"\mathrm{a\_\_}", False)),
("a_1", False, True, True, (r"\mathrm{a\_1}", False)),
("alpha", False, True, True, (r"\mathrm{alpha}", False)),
("alpha", True, True, True, (r"\alpha", True)),
("alphabet", True, True, True, (r"\mathrm{alphabet}", False)),
("foo", False, True, True, (r"\mathrm{foo}", False)),
("foo", True, True, True, (r"\mathrm{foo}", False)),
("foo", True, False, True, (r"foo", False)),
("aa", False, True, False, (r"\mathrm{aa}", False)),
("a_a", False, True, False, (r"\mathrm{a_{a}}", False)),
("a_1", False, True, False, (r"\mathrm{a_{1}}", False)),
("alpha", True, False, False, (r"\alpha", True)),
("alpha_1", True, False, False, (r"\alpha_{1}", False)),
("x_alpha", True, False, False, (r"x_{\alpha}", False)),
("x_alpha_beta", True, False, False, (r"x_{\alpha_{\beta}}", False)),
("alpha_beta", True, False, False, (r"\alpha_{\beta}", False)),
],
)
def test_identifier_converter(
name: str, use_math_symbols: bool, use_mathrm: bool, expected: tuple[str, bool]
name: str,
use_math_symbols: bool,
use_mathrm: bool,
escape_underscores: bool,
expected: tuple[str, bool],
) -> None:
assert (
identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols, use_mathrm=use_mathrm
use_math_symbols=use_math_symbols,
use_mathrm=use_mathrm,
escape_underscores=escape_underscores,
).convert(name)
== expected
)


@pytest.mark.parametrize(
"name,use_math_symbols,use_mathrm,escape_underscores",
[
("_", False, True, False),
("a_", False, True, False),
("_a", False, True, False),
("_1", False, True, False),
("__", False, True, False),
("a__", False, True, False),
("alpha_", True, False, False),
("_alpha", True, False, False),
("x__alpha", True, False, False),
],
)
def test_identifier_converter_failure(
name: str,
use_math_symbols: bool,
use_mathrm: bool,
escape_underscores: bool,
) -> None:
with pytest.raises(ValueError):
identifier_converter.IdentifierConverter(
use_math_symbols=use_math_symbols,
use_mathrm=use_mathrm,
escape_underscores=escape_underscores,
).convert(name)
2 changes: 2 additions & 0 deletions src/latexify/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class Config:
use_math_symbols: bool
use_set_symbols: bool
use_signature: bool
escape_underscores: bool

def merge(self, *, config: Config | None = None, **kwargs) -> Config:
"""Merge configuration based on old configuration and field values.
Expand Down Expand Up @@ -75,4 +76,5 @@ def defaults() -> Config:
use_math_symbols=False,
use_set_symbols=False,
use_signature=True,
escape_underscores=True,
)
3 changes: 3 additions & 0 deletions src/latexify/generate_latex.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,17 +66,20 @@ def get_latex(
return codegen.AlgorithmicCodegen(
use_math_symbols=merged_config.use_math_symbols,
use_set_symbols=merged_config.use_set_symbols,
escape_underscores=merged_config.escape_underscores,
).visit(tree)
elif style == Style.FUNCTION:
return codegen.FunctionCodegen(
use_math_symbols=merged_config.use_math_symbols,
use_signature=merged_config.use_signature,
use_set_symbols=merged_config.use_set_symbols,
escape_underscores=merged_config.escape_underscores,
).visit(tree)
elif style == Style.IPYTHON_ALGORITHMIC:
return codegen.IPythonAlgorithmicCodegen(
use_math_symbols=merged_config.use_math_symbols,
use_set_symbols=merged_config.use_set_symbols,
escape_underscores=merged_config.escape_underscores,
).visit(tree)

raise ValueError(f"Unrecognized style: {style}")

0 comments on commit 54dc869

Please sign in to comment.