From 70ce8d4d7ae46cc371265492ad0aa327946157b8 Mon Sep 17 00:00:00 2001 From: Danny Lin Date: Tue, 26 Mar 2024 18:55:52 +0800 Subject: [PATCH] scripts: fix and improve compile_regexp - Support v flag for RegExp. - Fix pattern in a character class be incorrectly converted. --- scripts/build.py | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/scripts/build.py b/scripts/build.py index 87591ac9..097b8fd5 100644 --- a/scripts/build.py +++ b/scripts/build.py @@ -46,6 +46,7 @@ def escape_regex_slash(text): 'm': re.MULTILINE, 's': re.DOTALL, 'u': 0, + 'v': 0, 'y': 0, } JS_REGEXP_PATTERN_FIXER = re.compile( @@ -56,25 +57,59 @@ def escape_regex_slash(text): | \\u{(?P[0-9A-Fa-f]+)} | - (?P\\.) + \[(?P[^\\\]]*(?:\\.[^\\\]]*)*)\] + | + \\. + """, + flags=re.S + re.X, +) +JS_REGEXP_PATTERN_FIXER_CHAR_CLASS = re.compile( + r""" + \\u{(?P[0-9A-Fa-f]+)} + | + \\. """, flags=re.S + re.X, ) def _compile_regexp_fixer(m, flags=''): - if m.group('escape'): - return m.group('escape') - elif m.group('named_group_def'): + try: + assert m.group('named_group_def') + except (AssertionError, IndexError): + pass + else: return rf"(?P<{m.group('named_group_def')}>" - elif m.group('named_group_ref'): + + try: + assert m.group('named_group_ref') + except (AssertionError, IndexError): + pass + else: return rf"(?P={m.group('named_group_ref')})" - elif m.group('braced_unicode_hex'): + + try: + assert m.group('braced_unicode_hex') + except (AssertionError, IndexError): + pass + else: if 'u' in flags: code = int(m.group('braced_unicode_hex'), 16) return rf'\u{code:04X}' if code <= 0xFFFF else rf'\U{code:08X}' else: return rf"u{{{m.group('braced_unicode_hex')}}}" + + try: + assert m.group('char_class') + except (AssertionError, IndexError): + pass + else: + subpattern = JS_REGEXP_PATTERN_FIXER_CHAR_CLASS.sub( + partial(_compile_regexp_fixer, flags=flags), + m.group('char_class'), + ) + return rf'[{subpattern}]' + return m.group(0)