@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101
101
#define XFAKEEOF 0x40
102
102
#define XFAKEBRACK 0x80
103
103
104
+ #define FROM_DOLLAR 1
105
+ #define FROM_SNAIL 2
106
+ #define FROM_PERCENT 3
107
+ #define FROM_IDENT 4
108
+ #define FROM_INTERDEPENDMAYBE 5
109
+
104
110
#ifdef USE_UTF8_SCRIPTS
105
111
# define UTF cBOOL(!IN_BYTES)
106
112
#else
@@ -4547,7 +4553,7 @@ S_is_existing_identifier(pTHX_ char *s, char *e, char sigil, bool is_utf8)
4547
4553
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
4548
4554
4549
4555
STATIC int
4550
- S_intuit_more (pTHX_ char * s , char * e )
4556
+ S_intuit_more (pTHX_ char * s , char * e , U8 caller_context )
4551
4557
{
4552
4558
PERL_ARGS_ASSERT_INTUIT_MORE ;
4553
4559
@@ -4608,23 +4614,47 @@ S_intuit_more(pTHX_ char *s, char *e)
4608
4614
if (s [0 ] == ']' || s [0 ] == '^' )
4609
4615
return FALSE;
4610
4616
4611
- /* khw: If the context of this call is $foo[...], we may be able to avoid
4612
- * the heuristics below. The possibilities are:
4613
- * strict @foo $foo
4614
- * vars? exists exists
4615
- * y n n This is an error; return false now
4616
- * y n y must be a a charclass
4617
- * y y n must be a a subscript
4618
- * y y y ambiguous; do heuristics below
4619
- * n n n ambiguous; do heuristics below
4620
- * n n y ambiguous; do heuristics below, but I
4621
- * wonder if the initial bias should be a
4622
- * little towards charclass
4623
- * n y n ambiguous; do heuristics below, but I
4624
- * wonder if the initial bias should be a
4625
- * little towards subscript
4626
- * n y y ambiguous; do heuristics below
4627
- */
4617
+
4618
+ /* If the input is of the form '$foo[...', and there is a $foo scalar and
4619
+ * no @foo array, then '...' is more likely to be a character class.
4620
+ * (Under 'strict vars', we know at compile time all the accessible
4621
+ * variables, so in that case it MUST be a character class.) If the
4622
+ * situation is reversed, it is more likely or must be a subscript */
4623
+ if ( caller_context == FROM_DOLLAR
4624
+ || (caller_context == FROM_INTERDEPENDMAYBE && PL_tokenbuf [0 ] == '@' ))
4625
+ {
4626
+ char * e = PL_tokenbuf + sizeof (PL_tokenbuf ) + 1 ;
4627
+
4628
+ /* See if there is a known scalar for what our caller is asking about.
4629
+ * */
4630
+ bool has_scalar = is_existing_identifier (PL_tokenbuf , e , '$' , UTF );
4631
+
4632
+ /* Repeat to see if there is a known array of the given name */
4633
+ bool has_array = is_existing_identifier (PL_tokenbuf , e , '@' , UTF );
4634
+
4635
+ unsigned int count = has_scalar + has_array ;
4636
+
4637
+ /* Under strict, we need some variable to be declared. */
4638
+ if (PL_hints & HINT_STRICT_VARS ) {
4639
+
4640
+ /* If none are, is an error, return false to stop useless further
4641
+ * parsing. */
4642
+ if (count == 0 ) {
4643
+ return false;
4644
+ }
4645
+
4646
+ /* When just one variable is declared, the construct has to match
4647
+ * what the variable is. If it is an array, this must be a
4648
+ * subscript which needs further processing; otherwise it is a
4649
+ * character class needing nothing further. */
4650
+ if (count == 1 ) {
4651
+ return has_array ;
4652
+ }
4653
+
4654
+ /* Here have both an array and a scalar with the same name. Drop
4655
+ * down to use the heuristics to try to intuit which is meant */
4656
+ }
4657
+ }
4628
4658
4629
4659
/* Find matching ']'. khw: Actually it finds the next ']' and assumes it
4630
4660
* matches the '['. In order to account for the possibility of the ']'
@@ -5585,7 +5615,7 @@ yyl_dollar(pTHX_ char *s)
5585
5615
s = skipspace (s );
5586
5616
5587
5617
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
5588
- && intuit_more (s , PL_bufend )) {
5618
+ && intuit_more (s , PL_bufend , FROM_DOLLAR )) {
5589
5619
if (* s == '[' ) {
5590
5620
PL_tokenbuf [0 ] = '@' ;
5591
5621
if (ckWARN (WARN_SYNTAX )) {
@@ -6288,7 +6318,7 @@ yyl_percent(pTHX_ char *s)
6288
6318
PREREF (PERLY_PERCENT_SIGN );
6289
6319
}
6290
6320
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6291
- && intuit_more (s , PL_bufend )) {
6321
+ && intuit_more (s , PL_bufend , FROM_PERCENT )) {
6292
6322
if (* s == '[' )
6293
6323
PL_tokenbuf [0 ] = '@' ;
6294
6324
}
@@ -6910,7 +6940,7 @@ yyl_snail(pTHX_ char *s)
6910
6940
if (PL_lex_state == LEX_NORMAL || PL_lex_brackets )
6911
6941
s = skipspace (s );
6912
6942
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6913
- && intuit_more (s , PL_bufend ))
6943
+ && intuit_more (s , PL_bufend , FROM_SNAIL ))
6914
6944
{
6915
6945
if (* s == '{' )
6916
6946
PL_tokenbuf [0 ] = '%' ;
@@ -9993,7 +10023,7 @@ Perl_yylex(pTHX)
9993
10023
return yylex ();
9994
10024
9995
10025
case LEX_INTERPENDMAYBE :
9996
- if (intuit_more (PL_bufptr , PL_bufend )) {
10026
+ if (intuit_more (PL_bufptr , PL_bufend , FROM_INTERDEPENDMAYBE )) {
9997
10027
PL_lex_state = LEX_INTERPNORMAL ; /* false alarm, more expr */
9998
10028
break ;
9999
10029
}
@@ -10807,7 +10837,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10807
10837
}
10808
10838
else if ( PL_lex_state == LEX_INTERPNORMAL
10809
10839
&& !PL_lex_brackets
10810
- && !intuit_more (s , PL_bufend ))
10840
+ && !intuit_more (s , PL_bufend , FROM_IDENT ))
10811
10841
PL_lex_state = LEX_INTERPEND ;
10812
10842
return s ;
10813
10843
}
0 commit comments