Skip to content

Commit 642ed1f

Browse files
committed
Avoid escaping / and ?, these are unambiguous.
1 parent f2b7f8c commit 642ed1f

File tree

5 files changed

+15
-8
lines changed

5 files changed

+15
-8
lines changed

docs/source/changelog.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ Changelog
99
Version (dev)
1010
-------------
1111
* #35: Only use character escapes in FGD custom syntax mode. The original parser only allows `\\n`.
12+
* Avoid escaping `/` and `?`, these are unambiguous.
1213

1314
-------------
1415
Version 2.4.1

src/srctools/_tokenizer.pyx

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1071,7 +1071,8 @@ def escape_text(str text not None: str) -> str:
10711071
r"""Escape special characters and backslashes, so tokenising reproduces them.
10721072

10731073
This matches utilbuffer.cpp in the SDK.
1074-
The following characters are escaped: \n, \t, \v, \b, \r, \f, \a, \, /, ?, ', ".
1074+
The following characters are escaped: \n, \t, \v, \b, \r, \f, \a, \, ', ".
1075+
/ and ? are accepted as escapes, but not produced since they're unambiguous.
10751076
"""
10761077
# UTF8 = ASCII for the chars we care about, so we can just loop over the
10771078
# UTF8 data.
@@ -1114,8 +1115,6 @@ def escape_text(str text not None: str) -> str:
11141115
j = _write_escape(out_buff, j, b'f')
11151116
elif letter == b'\a':
11161117
j = _write_escape(out_buff, j, b'a')
1117-
elif letter == b'?':
1118-
j = _write_escape(out_buff, j, b'?')
11191118
elif letter == b'\\':
11201119
j = _write_escape(out_buff, j, b'\\')
11211120
elif letter == b'"':

src/srctools/tokenizer.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
Character escapes match matches `utilbuffer.cpp <https://github.com/ValveSoftware/source-sdk-2013/blob/0d8dceea4310fde5706b3ce1c70609d72a38efdf/sp/src/tier1/utlbuffer.cpp#L57-L69>`_ in the SDK.
1818
Specifically, the following characters are escaped:
1919
`\\\\n`, `\\\\t`, `\\\\v`, `\\\\b`, `\\\\r`, `\\\\f`, `\\\\a`, `\\`, `?`, `'` and `"`.
20+
`/` and `?` are accepted as escapes, but not produced since they're unambiguous.
2021
"""
2122
import re
2223
from typing import (
@@ -177,7 +178,10 @@ def has_value(self) -> bool:
177178
'?': '?',
178179
}
179180
ESCAPES_INV = {char: f'\\{sym}' for sym, char in ESCAPES.items()}
180-
ESCAPE_RE = re.compile('|'.join(map(re.escape, ESCAPES_INV)))
181+
ESCAPE_RE = re.compile('|'.join(
182+
re.escape(c) for c in ESCAPES_INV
183+
if c not in '?/'
184+
))
181185

182186
#: Characters not allowed for bare strings. These must be quoted.
183187
BARE_DISALLOWED: Final = frozenset('"\'{};,=[]()\r\n\t ')

tests/test_fgd/test_export_regressions_vanilla_.fgd

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,8 @@
1717
= npc_test: "Entity description, extending beyond 1000 characters: 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, " +
1818
"211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, 256, 257, 258, 259, 260, 261, 262, 263, 264, 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381, 382, 383, 384, 385, 386, 387, 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 399, 400, 401, 402, 403, 404, 405, 406, 407, 408, 409, 410, " +
1919
"411, 412, 413, 414, 415, 416, 417, 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499. Done!\n escapes: tab= , newline=\n, ''quoted'', bell="
20-
[ test_kv(color255) : "A test keyvalue" : "255 255 128" : "Help text for a keyvalue"
20+
[
21+
test_kv(color255) : "A test keyvalue" : "255 255 128" : "Help text for a keyvalue"
2122
spawnflags(flags) =
2223
[
2324
1: "[1] A" : 0

tests/test_tokenizer.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,8 @@
1919
IS_CPYTHON = platform.python_implementation() == 'CPython'
2020

2121
# See https://github.com/ValveSoftware/source-sdk-2013/blob/0d8dceea4310fde5706b3ce1c70609d72a38efdf/sp/src/tier1/utlbuffer.cpp#L57-L69
22-
ESCAPE_CHARS = "\n \t \v \b \r \f \a \\ ? \' \""
23-
ESCAPE_ENCODED = r"\n \t \v \b \r \f \a \\ \? \' " + r'\"'
22+
ESCAPE_CHARS = "\n \t \v \b \r \f \a \\ \' \""
23+
ESCAPE_ENCODED = r"\n \t \v \b \r \f \a \\ \' " + r'\"'
2424

2525
# The correct result of parsing prop_parse_test.
2626
# Either the token, or token + value (which must be correct).
@@ -535,6 +535,7 @@ def test_obj_config(py_c_token: Type[Tokenizer], parm: str, default: bool) -> No
535535
("\\thello_world", r"\\thello_world"),
536536
("\\ttest\nvalue\t\\r\t\n", r"\\ttest\nvalue\t\\r\t\n"),
537537
(ESCAPE_CHARS, ESCAPE_ENCODED),
538+
('Unchanged ?/', 'Unchanged ?/'),
538539
# BMP characters, and some multiplane chars.
539540
('test: ╒══╕', r'test: ╒══╕'),
540541
("♜♞🤐♝♛🥌 chess: ♚♝♞♜", "♜♞🤐♝♛🥌 chess: ♚♝♞♜"),
@@ -685,12 +686,13 @@ def test_allow_escapes(py_c_token: Type[Tokenizer]) -> None:
685686
Token.BRACE_CLOSE,
686687
])
687688
check_tokens(py_c_token(
688-
f'{{ "string\\n" "all escapes: {ESCAPE_ENCODED}" }}',
689+
f'{{ "string\\n" "all escapes: {ESCAPE_ENCODED}" "also accepted /?" }}',
689690
allow_escapes=True,
690691
), [
691692
Token.BRACE_OPEN,
692693
(Token.STRING, "string\n"),
693694
(Token.STRING, f'all escapes: {ESCAPE_CHARS}'),
695+
(Token.STRING, 'also accepted /?'),
694696
Token.BRACE_CLOSE,
695697
])
696698

0 commit comments

Comments
 (0)