Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion embed.fnc
Original file line number Diff line number Diff line change
Expand Up @@ -6082,7 +6082,15 @@ S |int |intuit_method |NN char *start \
|NULLOK SV *ioname \
|NULLOK NOCHECK CV *cv
S |int |intuit_more |NN char *s \
|NN char *e
|NN char *e \
|U8 caller_context \
|NULLOK char *caller_s \
|Size_t caller_length
S |bool |is_existing_identifier \
|NN char *s \
|Size_t len \
|char sigil \
|bool is_utf8
S |I32 |lop |enum yytokentype t \
|I32 f \
|U8 x \
Expand Down
3 changes: 2 additions & 1 deletion embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -1684,7 +1684,8 @@
# define get_and_check_backslash_N_name_wrapper(a,b) S_get_and_check_backslash_N_name_wrapper(aTHX_ a,b)
# define incline(a,b) S_incline(aTHX_ a,b)
# define intuit_method(a,b,c) S_intuit_method(aTHX_ a,b,c)
# define intuit_more(a,b) S_intuit_more(aTHX_ a,b)
# define intuit_more(a,b,c,d,e) S_intuit_more(aTHX_ a,b,c,d,e)
# define is_existing_identifier(a,b,c,d) S_is_existing_identifier(aTHX_ a,b,c,d)
# define lop(a,b,c,d) S_lop(aTHX_ a,b,c,d)
# define missingterm(a,b) S_missingterm(aTHX_ a,b)
# define parse_ident(a,b,c,d,e,f) S_parse_ident(aTHX_ a,b,c,d,e,f)
Expand Down
7 changes: 6 additions & 1 deletion proto.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

99 changes: 93 additions & 6 deletions toke.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
#define XFAKEEOF 0x40
#define XFAKEBRACK 0x80

#define FROM_DOLLAR 1
#define FROM_SNAIL 2
#define FROM_PERCENT 3
#define FROM_IDENT 4
#define FROM_INTERDEPENDMAYBE 5

#ifdef USE_UTF8_SCRIPTS
# define UTF cBOOL(!IN_BYTES)
#else
Expand Down Expand Up @@ -4467,6 +4473,33 @@ S_scan_const(pTHX_ char *start)
return s;
}

STATIC bool
S_is_existing_identifier(pTHX_ char *s, Size_t len, char sigil, bool is_utf8)
{
PERL_ARGS_ASSERT_IS_EXISTING_IDENTIFIER;

/* This returns a boolean indicating if a string represents an identifier
* known to the program. 'sigil' is the character indicating the type of
* the identifier to look for. (though '%' is currently not specially
* handled.) The string from 's + 1' to (s + len) is looked at. s[0] is
* ignored, but must exist; the function overwrites it temporarily,
* restoring it before returning */

char save_sigil = s[0];
s[0] = sigil;
PADOFFSET slot = pad_findmy_pv(s, 0);
s[0] = save_sigil;

return slot != NOT_IN_PAD
|| gv_fetchpvn_flags(s + 1, len - 1,
(is_utf8) ? SVf_UTF8 : 0,
(sigil == '@')
? SVt_PVAV
: (sigil == '&')
? SVt_PVCV
: SVt_PV);
}

/* S_intuit_more
* Returns TRUE if there's more to the expression (e.g., a subscript),
* FALSE otherwise.
Expand All @@ -4490,7 +4523,11 @@ S_scan_const(pTHX_ char *start)
/* This is the one truly awful dwimmer necessary to conflate C and sed. */

STATIC int
S_intuit_more(pTHX_ char *s, char *e)
S_intuit_more(pTHX_ char *s, char *e,
U8 caller_context, /* Who's calling us? basically an enum */
char * caller_s, /* If non-NULL, the name of the identifier
that resulted in this call */
Size_t caller_length) /* And the length of that name */
{
PERL_ARGS_ASSERT_INTUIT_MORE;

Expand Down Expand Up @@ -4554,6 +4591,49 @@ S_intuit_more(pTHX_ char *s, char *e)
if (s[0] == ']' || s[0] == '^')
return FALSE;

bool under_strict_vars = PL_hints & HINT_STRICT_VARS;

/* If the input is of the form '$foo[...', and there is a $foo scalar and
* no @foo array, then '...' is more likely to be a character class.
* (Under 'strict vars', we know at compile time all the accessible
* variables, so in that case it MUST be a character class.) If the
* situation is reversed, it is more likely to be (or must be) a
* subscript. */
if (caller_context == FROM_DOLLAR) {
assert (caller_s);

/* See if there is a known scalar for the input identifier */
bool has_scalar = is_existing_identifier(caller_s, caller_length,
'$', UTF);

/* Repeat to see if there is a known array of the given name */
bool has_array = is_existing_identifier(caller_s, caller_length,
'@', UTF);

unsigned int count = has_scalar + has_array;

/* Under strict, we need some variable to be declared. */
if (under_strict_vars) {

/* If none are, is an error. Return false to stop useless further
* parsing. */
if (count == 0) {
return false;
}

/* When just one variable is declared, the construct has to match
* what the variable is. If it is an array, this must be a
* subscript which needs further processing; otherwise it is a
* character class needing nothing further. */
if (count == 1) {
return has_array;
}

/* Here have both an array and a scalar with the same name. Drop
* down to use the heuristics to try to intuit which is meant */
}
}

/* Find matching ']'. khw: This means any s[1] below is guaranteed to
* exist */
const char * const send = (char *) memchr(s, ']', e - s);
Expand Down Expand Up @@ -5386,7 +5466,9 @@ yyl_dollar(pTHX_ char *s)
s = skipspace(s);

if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
&& intuit_more(s, PL_bufend)) {
&& intuit_more(s, PL_bufend, FROM_DOLLAR,
PL_tokenbuf, strlen(PL_tokenbuf)))
{
if (*s == '[') {
PL_tokenbuf[0] = '@';
if (ckWARN(WARN_SYNTAX)) {
Expand Down Expand Up @@ -6090,7 +6172,9 @@ yyl_percent(pTHX_ char *s)
PREREF(PERLY_PERCENT_SIGN);
}
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
&& intuit_more(s, PL_bufend)) {
&& intuit_more(s, PL_bufend, FROM_PERCENT,
PL_tokenbuf, strlen(PL_tokenbuf)))
{
if (*s == '[')
PL_tokenbuf[0] = '@';
}
Expand Down Expand Up @@ -6712,7 +6796,8 @@ yyl_snail(pTHX_ char *s)
if (PL_lex_state == LEX_NORMAL || PL_lex_brackets)
s = skipspace(s);
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop)
&& intuit_more(s, PL_bufend))
&& intuit_more(s, PL_bufend, FROM_SNAIL,
PL_tokenbuf, strlen(PL_tokenbuf)))
{
if (*s == '{')
PL_tokenbuf[0] = '%';
Expand Down Expand Up @@ -9795,7 +9880,8 @@ Perl_yylex(pTHX)
return yylex();

case LEX_INTERPENDMAYBE:
if (intuit_more(PL_bufptr, PL_bufend)) {
if (intuit_more(PL_bufptr, PL_bufend, FROM_INTERDEPENDMAYBE, NULL, 0))
{
PL_lex_state = LEX_INTERPNORMAL; /* false alarm, more expr */
break;
}
Expand Down Expand Up @@ -10288,6 +10374,7 @@ S_parse_ident(pTHX_ char **s, char **d, char * const e, int allow_package,
bool is_utf8, bool check_dollar)
{
PERL_ARGS_ASSERT_PARSE_IDENT;
assert(*s <= PL_bufend);

while (*s < PL_bufend) {
if (*d >= e)
Expand Down Expand Up @@ -10608,7 +10695,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
}
else if ( PL_lex_state == LEX_INTERPNORMAL
&& !PL_lex_brackets
&& !intuit_more(s, PL_bufend))
&& !intuit_more(s, PL_bufend, FROM_IDENT, NULL, 0))
PL_lex_state = LEX_INTERPEND;
return s;
}
Expand Down
Loading