From c36dd393d8102a8894d71a667ff9b6307f0974cc Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Mon, 8 Sep 2025 16:27:57 -0600 Subject: [PATCH] Add isIDCONT_lazy_if_safe() Various places in the code are using isWORDCHAR to match the continuation in an identifier. This mostly works, but the two sets are not identical, and the proper thing to do is to match continuation characters. The infrastructure was lacking this macro that would make it easy to do the right thing. This commit adds the infrastructure, leaving it to future commits to use it. A reasonably complete list of characters that differ between the two sets is: MIDDLE DOT GREEK YPOGEGRAMMENI GREEK ANO TELEIA COMBINING CYRILLIC HUNDRED THOUSANDS SIGN COMBINING CYRILLIC MILLIONS SIGN ARMENIAN MODIFIER LETTER LEFT HALF RING ARMENIAN EMPHASIS MARK NEW TAI LUE THAM DIGIT ONE COMBINING PARENTHESES OVERLAY COMBINING ENCLOSING CIRCLE COMBINING ENCLOSING CIRCLE BACKSLASH COMBINING ENCLOSING SCREEN COMBINING ENCLOSING UPWARD POINTING TRIANGLE MANDAIC LETTER AZ ESTIMATED SYMBOL CIRCLED LATIN CAPITAL LETTER A ... CIRCLED LATIN SMALL LETTER Z VERTICAL TILDE KATAKANA MIDDLE DOT COMBINING CYRILLIC TEN MILLIONS SIGN COMBINING CYRILLIC THOUSAND MILLIONS SIGN ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM ARABIC LIGATURE SHADDA WITH SUPERSCRIPT ALEF ISOLATED FORM ARABIC LIGATURE SALLALLAHOU ALAYHE WASALLAM ARABIC LIGATURE JALLAJALALOUHOU ARABIC FATHATAN ISOLATED FORM ARABIC DAMMATAN ISOLATED FORM ARABIC KASRATAN ISOLATED FORM ARABIC FATHA ISOLATED FORM ARABIC DAMMA ISOLATED FORM ARABIC KASRA ISOLATED FORM ARABIC SHADDA ISOLATED FORM ARABIC SUKUN ISOLATED FORM HALFWIDTH KATAKANA MIDDLE DOT SQUARED LATIN CAPITAL LETTER A SQUARED LATIN CAPITAL LETTER Z NEGATIVE CIRCLED LATIN CAPITAL LETTER A ... NEGATIVE CIRCLED LATIN CAPITAL LETTER Z NEGATIVE SQUARED LATIN CAPITAL LETTER A ... NEGATIVE SQUARED LATIN CAPITAL LETTER Z --- utf8.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/utf8.h b/utf8.h index 0fe5036ae148..fdc29950378b 100644 --- a/utf8.h +++ b/utf8.h @@ -946,6 +946,10 @@ implementation of the latter. */ ((IN_BYTES || !UTF) \ ? isIDFIRST(*(p)) \ : isIDFIRST_utf8_safe(p, e)) +#define isIDCONT_lazy_if_safe(p, e, UTF) \ + ((IN_BYTES || !UTF) \ + ? isIDCONT(*(p)) \ + : isIDCONT_utf8_safe(p, e)) #define isWORDCHAR_lazy_if_safe(p, e, UTF) \ ((IN_BYTES || !UTF) \ ? isWORDCHAR(*(p)) \