Skip to content

Commit

Permalink
Add Tokenizer.plus_operator option
Browse files Browse the repository at this point in the history
  • Loading branch information
TeamSpen210 committed May 24, 2024
1 parent 1640689 commit 40c662f
Show file tree
Hide file tree
Showing 6 changed files with 97 additions and 69 deletions.
3 changes: 3 additions & 0 deletions docs/source/changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ Version (dev)
allow reporting the source location in exceptions.
* :py:meth:`Keyvalues.export() <srctools.keyvalues.Keyvalues.export>` is now deprecated.
* Allow directly passing enums to set VMF keyvalues and fixups, if the ``value`` is itself a valid value.
* Add :py:attr:`~srctools.tokenizer.Tokenizer.plus_operator`, allowing `+` to be
parsed as an operator for FGDs but still be valid inside bare strings elsewhere.
These are common in ``gameinfo.txt``.

--------------
Version 2.3.17
Expand Down
53 changes: 29 additions & 24 deletions src/srctools/_tokenizer.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ cdef:

# Characters not allowed for bare names on a line.
# TODO: Make this an actual constant value, but that won't do the switch optimisation.
DEF BARE_DISALLOWED = b'"\'{};,=+[]()\r\n\t '
DEF BARE_DISALLOWED = b'"\'{};,=[]()\r\n\t '

# Pack flags into a bitfield.
cdef extern from *:
Expand All @@ -78,6 +78,7 @@ struct TokFlags {
unsigned char string_brackets: 1;
unsigned char allow_escapes: 1;
unsigned char colon_operator: 1;
unsigned char plus_operator: 1;
unsigned char allow_star_comments: 1;
unsigned char preserve_comments: 1;
unsigned char file_input: 1;
Expand All @@ -88,6 +89,7 @@ struct TokFlags {
bint string_brackets
bint allow_escapes
bint colon_operator
bint plus_operator
bint allow_star_comments
bint preserve_comments
# If set, the file_iter is a bound read() method.
Expand Down Expand Up @@ -419,11 +421,12 @@ cdef class Tokenizer(BaseTokenizer):
object filename=None,
error=None,
*,
bint string_bracket=False,
bint allow_escapes=True,
bint allow_star_comments=False,
bint preserve_comments=False,
bint colon_operator=False,
bint string_bracket: bool = False,
bint allow_escapes: bool = True,
bint allow_star_comments: bool = False,
bint preserve_comments: bool = False,
bint colon_operator: bool = False,
bint plus_operator: bool = False,
):
# Early warning for this particular error.
if isinstance(data, bytes) or isinstance(data, bytearray):
Expand All @@ -438,6 +441,7 @@ cdef class Tokenizer(BaseTokenizer):
'allow_star_comments': allow_star_comments,
'preserve_comments': preserve_comments,
'colon_operator': colon_operator,
'plus_operator': plus_operator,
'file_input': 0,
'last_was_cr': 0,
}
Expand Down Expand Up @@ -492,60 +496,61 @@ cdef class Tokenizer(BaseTokenizer):

@property
def string_bracket(self) -> bool:
"""Check if [bracket] blocks are parsed as a single string-like block.
"""Controls whether [bracket] blocks are parsed as a single string-like block.

If disabled these are parsed as BRACK_OPEN, STRING, BRACK_CLOSE.
"""
return self.flags.string_brackets

@string_bracket.setter
def string_bracket(self, bint value) -> None:
"""Set if [bracket] blocks are parsed as a single string-like block.

If disabled these are parsed as BRACK_OPEN, STRING, BRACK_CLOSE.
"""
self.flags.string_brackets = value

@property
def allow_escapes(self) -> bool:
"""Check if backslash escapes will be parsed."""
"""Controls whether backslash escapes will be parsed."""
return self.flags.allow_escapes

@allow_escapes.setter
def allow_escapes(self, bint value) -> None:
"""Set if backslash escapes will be parsed."""
self.flags.allow_escapes = value

@property
def allow_star_comments(self) -> bool:
"""Check if /**/ style comments will be enabled."""
"""Controls whether /**/ style comments will be enabled."""
return self.flags.allow_star_comments

@allow_star_comments.setter
def allow_star_comments(self, bint value) -> None:
"""Set if /**/ style comments are enabled."""
self.flags.allow_star_comments = value

@property
def preserve_comments(self) -> bool:
"""Check if comments will be output as tokens."""
"""Controls whether comments will be output as tokens."""
return self.flags.preserve_comments

@preserve_comments.setter
def preserve_comments(self, bint value) -> None:
"""Set if comments will be output as tokens."""
self.flags.preserve_comments = value

@property
def colon_operator(self) -> bool:
"""Check if : characters are treated as a COLON token, or part of strings."""
"""Controls whether : characters are treated as a COLON token, or part of strings."""
return self.flags.colon_operator

@colon_operator.setter
def colon_operator(self, bint value) -> None:
"""Set if : characters are treated as a COLON token, or part of strings."""
self.flags.colon_operator = value

@property
def plus_operator(self) -> bool:
"""Controls whether + characters are treated as a PLUS token, or part of strings."""
return self.flags.plus_operator

@plus_operator.setter
def plus_operator(self, bint value) -> None:
self.flags.plus_operator = value

cdef inline bint buf_reset(self) except False:
"""Reset the temporary buffer."""
# Don't bother resizing or clearing, the next append will overwrite.
Expand Down Expand Up @@ -669,8 +674,6 @@ cdef class Tokenizer(BaseTokenizer):
return BRACE_OPEN_TUP
elif next_char == b'}':
return BRACE_CLOSE_TUP
elif next_char == b'+':
return PLUS_TUP
elif next_char == b'=':
return EQUALS_TUP
elif next_char == b',':
Expand Down Expand Up @@ -896,6 +899,8 @@ cdef class Tokenizer(BaseTokenizer):
else: # These complex checks can't be in a switch, so we need to nest this.
if next_char == b':' and self.flags.colon_operator:
return COLON_TUP
if next_char == b'+' and self.flags.plus_operator:
return PLUS_TUP
# Bare names
if next_char not in BARE_DISALLOWED:
self.buf_reset()
Expand All @@ -908,12 +913,12 @@ cdef class Tokenizer(BaseTokenizer):
return STRING, self.buf_get_text()

elif (
next_char in BARE_DISALLOWED or
(next_char == b':' and self.flags.colon_operator)
next_char in BARE_DISALLOWED
or (next_char == b':' and self.flags.colon_operator)
or (next_char == b'+' and self.flags.plus_operator)
): # We need to repeat this so we return the ending
# char next. If it's not allowed, that'll error on
# next call.
# We need to repeat this so we return the newline.
self.char_index -= 1
return STRING, self.buf_get_text()
else:
Expand Down
1 change: 1 addition & 0 deletions src/srctools/fgd.py
Original file line number Diff line number Diff line change
Expand Up @@ -2192,6 +2192,7 @@ def parse_file(
error=FGDParseError,
string_bracket=False,
colon_operator=True,
plus_operator=True,
)
for token, token_value in tokeniser:
# The only things at top-level would be bare strings, and empty lines.
Expand Down
21 changes: 16 additions & 5 deletions src/srctools/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,9 +116,9 @@ class Token(Enum):
BRACK_OPEN = 12 #: A ``[`` character. Only used if ``PROP_FLAG`` is not.
BRACK_CLOSE = 13 #: A ``]`` character.

COLON = 14 #: A ``:`` character.
COLON = 14 #: A ``:`` character, if :py:attr:`~Tokenizer.colon_operator` is enabled.
EQUALS = 15 #: A ``=`` character.
PLUS = 16 #: A ``+`` character.
PLUS = 16 #: A ``+`` character, if :py:attr:`Tokenizer.plus_operator` is enabled.
COMMA = 17 #: A ``,`` character.

@property
Expand Down Expand Up @@ -149,7 +149,6 @@ def has_value(self) -> bool:
'}': Token.BRACE_CLOSE,

'=': Token.EQUALS,
'+': Token.PLUS,
',': Token.COMMA,
}

Expand All @@ -167,7 +166,7 @@ def has_value(self) -> bool:
}

#: Characters not allowed for bare strings. These must be quoted.
BARE_DISALLOWED: Final = frozenset('"\'{};,=+[]()\r\n\t ')
BARE_DISALLOWED: Final = frozenset('"\'{};,=[]()\r\n\t ')


class BaseTokenizer(abc.ABC):
Expand Down Expand Up @@ -389,6 +388,8 @@ class Tokenizer(BaseTokenizer):
* preserve_comments causes :py:const:`Token.COMMENT` tokens to be produced.
* colon_operator controls if ``:`` produces :py:const:`~Token.COLON` tokens, or is treated as
a bare string.
* plus_operator controls if ``+`` produces :py:const:`~Token.PLUS` tokens, or is treated as
a bare string.
"""
chunk_iter: Iterator[str]
cur_chunk: str
Expand All @@ -398,6 +399,7 @@ class Tokenizer(BaseTokenizer):
allow_star_comments: bool
preserve_comments: bool
colon_operator: bool
plus_operator: bool
_last_was_cr: bool

def __init__(
Expand All @@ -411,6 +413,7 @@ def __init__(
allow_star_comments: bool = False,
preserve_comments: bool = False,
colon_operator: bool = False,
plus_operator: bool = False,
) -> None:
# If a file-like object, automatically use the configured name.
if filename is None and hasattr(data, 'name'):
Expand Down Expand Up @@ -440,6 +443,7 @@ def __init__(
self.allow_escapes = bool(allow_escapes)
self.allow_star_comments = bool(allow_star_comments)
self.colon_operator = bool(colon_operator)
self.plus_operator = bool(plus_operator)
self.preserve_comments = bool(preserve_comments)
self._last_was_cr = False

Expand Down Expand Up @@ -552,6 +556,9 @@ def _get_token(self) -> Tuple[Token, str]:
elif next_char == ':' and self.colon_operator:
return Token.COLON, ':'

elif next_char == '+' and self.plus_operator:
return Token.PLUS, '+'

elif next_char == ']':
if self.string_bracket:
# If string_bracket is set (using PROP_FLAG), this is a
Expand Down Expand Up @@ -582,7 +589,11 @@ def _get_token(self) -> Tuple[Token, str]:
value_chars = [next_char]
while True:
next_char = self._next_char()
if next_char in BARE_DISALLOWED or (next_char == ':' and self.colon_operator):
if (
next_char in BARE_DISALLOWED
or (next_char == ':' and self.colon_operator)
or (next_char == '+' and self.plus_operator)
):
# We need to repeat this, so we return the ending char next.
# If it's not allowed, that'll error on next call.
self.char_index -= 1
Expand Down
24 changes: 12 additions & 12 deletions tests/test_keyvalues.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def test_names() -> None:
"Extra" "Spaces"
// "Commented" "out"
"Block" {
"Empty"
Empty
{
} }
"Block" // "with value"
Expand All @@ -133,7 +133,7 @@ def test_names() -> None:
"Root2"
{
"Name with \\" in it" "Value with \\" inside"
"multiline" "text
multi+line "text
\tcan continue
for many \\"lines\\" of
possibly indented
Expand All @@ -151,8 +151,8 @@ def test_names() -> None:
"Flag" "blocksthis" [!test_enabled]
"Replaced" "shouldbe"
"Replaced" "toreplace" [test_enabled]
"Replaced" "alsothis" [test_enabled]
"Replaced" to+replace [test_enabled]
"Replaced" also+this [test_enabled]
"Replaced" "shouldbe2"
"Replaced" "toreplace2" [!test_disabled]
Expand All @@ -165,8 +165,8 @@ def test_names() -> None:
}
"Replaced" [test_enabled]
{
"lambda" "should"
"replace" "above"
lambda should
replace above
}
"Replaced"
Expand Down Expand Up @@ -208,7 +208,7 @@ def test_names() -> None:
]),
P('Root2', [
P('Name with " in it', 'Value with \" inside'),
P('multiline',
P('multi+line',
'text\n\tcan continue\nfor many "lines" of\n possibly indented\n\ntext'
),
# Note, invalid = unchanged.
Expand All @@ -219,8 +219,8 @@ def test_names() -> None:
P('after ', 'value'),
P('Flag', 'allowed'),
P('FlagAllows', 'This'),
P('Replaced', 'toreplace'),
P('Replaced', 'alsothis'),
P('Replaced', 'to+replace'),
P('Replaced', 'also+this'),
P('Replaced', 'toreplace2'),
P('Replaced', 'alsothis2'),
P('Replaced', [
Expand Down Expand Up @@ -284,7 +284,7 @@ def test_build() -> None:
b.block('he\tre')
with b.Root2:
b['Name with " in it']('Value with \" inside')
b.multiline(
b['multi+line'](
'text\n\tcan continue\nfor many "lines" of\n possibly '
'indented\n\ntext'
)
Expand All @@ -297,8 +297,8 @@ def test_build() -> None:
b['after ']('value')
b.Flag('allowed')
b.FlagAllows('This')
b.Replaced('toreplace')
b.Replaced('alsothis')
b.Replaced('to+replace')
b.Replaced('also+this')
b.Replaced('toreplace2')
b.Replaced('alsothis2')
with b.Replaced:
Expand Down
Loading

0 comments on commit 40c662f

Please sign in to comment.