Skip to content

Commit

Permalink
scripts: fix and improve compile_regexp
Browse files Browse the repository at this point in the history
- Support v flag for RegExp.
- Fix pattern in a character class be incorrectly converted.
  • Loading branch information
danny0838 committed Mar 26, 2024
1 parent 564d85a commit 70ce8d4
Showing 1 changed file with 41 additions and 6 deletions.
47 changes: 41 additions & 6 deletions scripts/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ def escape_regex_slash(text):
'm': re.MULTILINE,
's': re.DOTALL,
'u': 0,
'v': 0,
'y': 0,
}
JS_REGEXP_PATTERN_FIXER = re.compile(
Expand All @@ -56,25 +57,59 @@ def escape_regex_slash(text):
|
\\u{(?P<braced_unicode_hex>[0-9A-Fa-f]+)}
|
(?P<escape>\\.)
\[(?P<char_class>[^\\\]]*(?:\\.[^\\\]]*)*)\]
|
\\.
""",
flags=re.S + re.X,
)
JS_REGEXP_PATTERN_FIXER_CHAR_CLASS = re.compile(
r"""
\\u{(?P<braced_unicode_hex>[0-9A-Fa-f]+)}
|
\\.
""",
flags=re.S + re.X,
)


def _compile_regexp_fixer(m, flags=''):
if m.group('escape'):
return m.group('escape')
elif m.group('named_group_def'):
try:
assert m.group('named_group_def')
except (AssertionError, IndexError):
pass
else:
return rf"(?P<{m.group('named_group_def')}>"
elif m.group('named_group_ref'):

try:
assert m.group('named_group_ref')
except (AssertionError, IndexError):
pass
else:
return rf"(?P={m.group('named_group_ref')})"
elif m.group('braced_unicode_hex'):

try:
assert m.group('braced_unicode_hex')
except (AssertionError, IndexError):
pass
else:
if 'u' in flags:
code = int(m.group('braced_unicode_hex'), 16)
return rf'\u{code:04X}' if code <= 0xFFFF else rf'\U{code:08X}'
else:
return rf"u{{{m.group('braced_unicode_hex')}}}"

try:
assert m.group('char_class')
except (AssertionError, IndexError):
pass
else:
subpattern = JS_REGEXP_PATTERN_FIXER_CHAR_CLASS.sub(
partial(_compile_regexp_fixer, flags=flags),
m.group('char_class'),
)
return rf'[{subpattern}]'

return m.group(0)


Expand Down

0 comments on commit 70ce8d4

Please sign in to comment.