Skip to content

Commit b8d6e44

Browse files
fix: liberalize rules for atoms by allowing leading '' (#6114)
This PR liberalizes atom rules by allowing `''` to be a prefix of an atom, after #6012 only added an exception for `''` alone, and also adds some unit tests for atom validation.
1 parent 5a99cb3 commit b8d6e44

File tree

2 files changed

+140
-1
lines changed

2 files changed

+140
-1
lines changed

src/Lean/Elab/Syntax.lean

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,8 +236,9 @@ where
236236
-- Pretty-printing instructions shouldn't affect validity
237237
let s := s.trim
238238
!s.isEmpty &&
239-
(s.front != '\'' || s == "''") &&
239+
(s.front != '\'' || "''".isPrefixOf s) &&
240240
s.front != '\"' &&
241+
!(isIdBeginEscape s.front) &&
241242
!(s.front == '`' && (s.endPos == ⟨1⟩ || isIdFirst (s.get ⟨1⟩) || isIdBeginEscape (s.get ⟨1⟩))) &&
242243
!s.front.isDigit &&
243244
!(s.any Char.isWhitespace)

tests/lean/run/atomValidation.lean

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,138 @@
1+
import Lean.Elab.Command
2+
import Lean.Elab.Syntax
3+
4+
open Lean.Elab.Term.toParserDescr (isValidAtom)
5+
open Lean Elab Command
6+
7+
/-!
8+
Test various classes of atoms that should be allowed or not.
9+
-/
10+
11+
def test (expected : Bool) (strings : List String) : CommandElabM Unit := Id.run do
12+
let mut wrong : List String := []
13+
for s in strings do
14+
if isValidAtom s != expected then
15+
wrong := s :: wrong
16+
if isValidAtom (" " ++ s) != expected then
17+
wrong := s!"{s} (with leading whitespace)" :: wrong
18+
if isValidAtom (s ++ " ") != expected then
19+
wrong := s!"{s} (with trailing whitespace)" :: wrong
20+
if isValidAtom (" " ++ s ++ " ") != expected then
21+
wrong := s!"{s} (with leading and trailing whitespace)" :: wrong
22+
23+
if wrong.isEmpty then
24+
logInfo <| "All " ++ if expected then "accepted" else "rejected"
25+
else
26+
logError <|
27+
s!"The following atoms should be {if expected then "" else "in"}valid but are not:\n" ++
28+
String.join (wrong.reverse.map (s! " * {·}\n"))
29+
30+
31+
syntax "#test_valid" term : command
32+
syntax "#test_invalid" term : command
33+
34+
macro_rules
35+
| `(#test_valid%$tok $t) => `(#eval%$tok test true $t)
36+
| `(#test_invalid%$tok $t) => `(#eval%$tok test false $t)
37+
38+
39+
/-!
40+
# No Empty Atoms
41+
-/
42+
43+
/-- info: All rejected -/
44+
#guard_msgs in
45+
#test_invalid [""]
46+
47+
48+
/-!
49+
# Preventing Character Literal Confusion
50+
51+
Atoms are required to be suitably unlike character literals. This means that if they start with a
52+
single quote, the next character must also be a single quote.
53+
54+
Two single quotes (and variations on it) has long-term usage as an infix operator in Mathlib.
55+
-/
56+
57+
/-- info: All accepted -/
58+
#guard_msgs in
59+
#test_valid ["if", "''", "''ᵁ", "if'", "x'y'z", "x''y"]
60+
61+
/-- info: All rejected -/
62+
#guard_msgs in
63+
#test_invalid ["'x'", "'ᵁ", "'c", "'no'", "'"]
64+
65+
66+
/-!
67+
# No Internal Whitespace
68+
-/
69+
70+
/-- info: All rejected -/
71+
#guard_msgs in
72+
#test_invalid ["open mixed", "open mixed"]
73+
74+
75+
/-!
76+
# No Confusion with String Literals
77+
78+
Internal double quotes are allowed.
79+
-/
80+
81+
/-- info: All accepted -/
82+
#guard_msgs in
83+
#test_valid ["what\"string\"is_this?"]
84+
85+
/-- info: All rejected -/
86+
#guard_msgs in
87+
#test_invalid ["\"","\"\"", "\"f\""]
88+
89+
/-!
90+
# No Confusion with Escaped Identifiers
91+
92+
This doesn't confuse the parser, but it may confuse a user if they can define an atom that can never
93+
be parsed.
94+
-/
95+
96+
/-- info: All accepted -/
97+
#guard_msgs in
98+
#test_valid ["prefix«", "prefix«test", "prefix«test»", "prefix«test»foo"]
99+
100+
/-- info: All rejected -/
101+
#guard_msgs in
102+
#test_invalid ["«", "«test", "«test»", "«test»foo"]
103+
104+
105+
/-!
106+
# No Confusion with Name Literals
107+
108+
The first two characters of an atom may not be a valid start of a name literal
109+
-/
110+
111+
/-- info: All accepted -/
112+
#guard_msgs in
113+
#test_valid ["``", "`!", "x`"]
114+
115+
/-!
116+
The next set all begin with U0x2035, REVERSED PRIME, rather than back-tick, and are thus accepted.
117+
-/
118+
/-- info: All accepted -/
119+
#guard_msgs in
120+
#test_valid ["‵", "‵x", "‵«", "‵xyz", "‵«x.y", "‵«x.y.z»"]
121+
122+
123+
/-- info: All rejected -/
124+
#guard_msgs in
125+
#test_invalid ["`", "`x", "`«", "`xyz", "`«x.y", "`«x.y.z»"]
126+
127+
128+
/-!
129+
# No Leading Digits
130+
-/
131+
132+
/-- info: All accepted -/
133+
#guard_msgs in
134+
#test_valid ["prefix5", "prefix22test", "prefix1test0", "prefix8test8foo"]
135+
136+
/-- info: All rejected -/
137+
#guard_msgs in
138+
#test_invalid ["0", "1test", "0test3"]

0 commit comments

Comments
 (0)