@@ -101,6 +101,12 @@ static const char ident_var_zero_multi_digit[] = "Numeric variables with more th
101
101
#define XFAKEEOF 0x40
102
102
#define XFAKEBRACK 0x80
103
103
104
+ #define FROM_DOLLAR 1
105
+ #define FROM_SNAIL 2
106
+ #define FROM_PERCENT 3
107
+ #define FROM_IDENT 4
108
+ #define FROM_INTERDEPENDMAYBE 5
109
+
104
110
#ifdef USE_UTF8_SCRIPTS
105
111
# define UTF cBOOL(!IN_BYTES)
106
112
#else
@@ -4517,7 +4523,11 @@ S_is_existing_identifier(pTHX_ char *s, Size_t len, char sigil, bool is_utf8)
4517
4523
/* This is the one truly awful dwimmer necessary to conflate C and sed. */
4518
4524
4519
4525
STATIC int
4520
- S_intuit_more (pTHX_ char * s , char * e )
4526
+ S_intuit_more (pTHX_ char * s , char * e ,
4527
+ U8 caller_context , /* Who's calling us? basically an enum */
4528
+ char * caller_s , /* If non-NULL, the name of the identifier
4529
+ that resulted in this call */
4530
+ Size_t caller_length ) /* And the length of that name */
4521
4531
{
4522
4532
PERL_ARGS_ASSERT_INTUIT_MORE ;
4523
4533
@@ -4581,6 +4591,49 @@ S_intuit_more(pTHX_ char *s, char *e)
4581
4591
if (s [0 ] == ']' || s [0 ] == '^' )
4582
4592
return FALSE;
4583
4593
4594
+ bool under_strict_vars = PL_hints & HINT_STRICT_VARS ;
4595
+
4596
+ /* If the input is of the form '$foo[...', and there is a $foo scalar and
4597
+ * no @foo array, then '...' is more likely to be a character class.
4598
+ * (Under 'strict vars', we know at compile time all the accessible
4599
+ * variables, so in that case it MUST be a character class.) If the
4600
+ * situation is reversed, it is more likely to be (or must be) a
4601
+ * subscript. */
4602
+ if (caller_context == FROM_DOLLAR ) {
4603
+ assert (caller_s );
4604
+
4605
+ /* See if there is a known scalar for the input identifier */
4606
+ bool has_scalar = is_existing_identifier (caller_s , caller_length ,
4607
+ '$' , UTF );
4608
+
4609
+ /* Repeat to see if there is a known array of the given name */
4610
+ bool has_array = is_existing_identifier (caller_s , caller_length ,
4611
+ '@' , UTF );
4612
+
4613
+ unsigned int count = has_scalar + has_array ;
4614
+
4615
+ /* Under strict, we need some variable to be declared. */
4616
+ if (under_strict_vars ) {
4617
+
4618
+ /* If none are, is an error. Return false to stop useless further
4619
+ * parsing. */
4620
+ if (count == 0 ) {
4621
+ return false;
4622
+ }
4623
+
4624
+ /* When just one variable is declared, the construct has to match
4625
+ * what the variable is. If it is an array, this must be a
4626
+ * subscript which needs further processing; otherwise it is a
4627
+ * character class needing nothing further. */
4628
+ if (count == 1 ) {
4629
+ return has_array ;
4630
+ }
4631
+
4632
+ /* Here have both an array and a scalar with the same name. Drop
4633
+ * down to use the heuristics to try to intuit which is meant */
4634
+ }
4635
+ }
4636
+
4584
4637
/* Find matching ']'. khw: This means any s[1] below is guaranteed to
4585
4638
* exist */
4586
4639
const char * const send = (char * ) memchr (s , ']' , e - s );
@@ -5413,7 +5466,9 @@ yyl_dollar(pTHX_ char *s)
5413
5466
s = skipspace (s );
5414
5467
5415
5468
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
5416
- && intuit_more (s , PL_bufend )) {
5469
+ && intuit_more (s , PL_bufend , FROM_DOLLAR ,
5470
+ PL_tokenbuf , strlen (PL_tokenbuf )))
5471
+ {
5417
5472
if (* s == '[' ) {
5418
5473
PL_tokenbuf [0 ] = '@' ;
5419
5474
if (ckWARN (WARN_SYNTAX )) {
@@ -6117,7 +6172,9 @@ yyl_percent(pTHX_ char *s)
6117
6172
PREREF (PERLY_PERCENT_SIGN );
6118
6173
}
6119
6174
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6120
- && intuit_more (s , PL_bufend )) {
6175
+ && intuit_more (s , PL_bufend , FROM_PERCENT ,
6176
+ PL_tokenbuf , strlen (PL_tokenbuf )))
6177
+ {
6121
6178
if (* s == '[' )
6122
6179
PL_tokenbuf [0 ] = '@' ;
6123
6180
}
@@ -6739,7 +6796,8 @@ yyl_snail(pTHX_ char *s)
6739
6796
if (PL_lex_state == LEX_NORMAL || PL_lex_brackets )
6740
6797
s = skipspace (s );
6741
6798
if ( (PL_expect != XREF || PL_oldoldbufptr == PL_last_lop )
6742
- && intuit_more (s , PL_bufend ))
6799
+ && intuit_more (s , PL_bufend , FROM_SNAIL ,
6800
+ PL_tokenbuf , strlen (PL_tokenbuf )))
6743
6801
{
6744
6802
if (* s == '{' )
6745
6803
PL_tokenbuf [0 ] = '%' ;
@@ -9822,7 +9880,8 @@ Perl_yylex(pTHX)
9822
9880
return yylex ();
9823
9881
9824
9882
case LEX_INTERPENDMAYBE :
9825
- if (intuit_more (PL_bufptr , PL_bufend )) {
9883
+ if (intuit_more (PL_bufptr , PL_bufend , FROM_INTERDEPENDMAYBE , NULL , 0 ))
9884
+ {
9826
9885
PL_lex_state = LEX_INTERPNORMAL ; /* false alarm, more expr */
9827
9886
break ;
9828
9887
}
@@ -10636,7 +10695,7 @@ S_scan_ident(pTHX_ char *s, char *dest, STRLEN destlen, I32 ck_uni)
10636
10695
}
10637
10696
else if ( PL_lex_state == LEX_INTERPNORMAL
10638
10697
&& !PL_lex_brackets
10639
- && !intuit_more (s , PL_bufend ))
10698
+ && !intuit_more (s , PL_bufend , FROM_IDENT , NULL , 0 ))
10640
10699
PL_lex_state = LEX_INTERPEND ;
10641
10700
return s ;
10642
10701
}
0 commit comments