Skip to content

Commit 577d068

Browse files
triallaxsharkdp
authored andcommitted
Support subscript characters in identifiers
1 parent 10ab90a commit 577d068

File tree

2 files changed

+18
-0
lines changed

2 files changed

+18
-0
lines changed

numbat/src/parser.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,6 +1687,7 @@ mod tests {
16871687
parse_as_expression(&["foo_bar"], identifier!("foo_bar"));
16881688
parse_as_expression(&["MeineSchöneVariable"], identifier!("MeineSchöneVariable"));
16891689
parse_as_expression(&["°"], identifier!("°"));
1690+
parse_as_expression(&["Mass_H₂O"], identifier!("Mass_H₂O"));
16901691
}
16911692

16921693
#[test]

numbat/src/tokenizer.rs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,13 @@ fn is_other_allowed_identifier_char(c: char) -> bool {
171171
c == '%'
172172
}
173173

174+
fn is_subscript_char(c: char) -> bool {
175+
let c_u32 = c as u32;
176+
177+
// See https://en.wikipedia.org/wiki/Unicode_subscripts_and_superscripts#Superscripts_and_subscripts_block
178+
(0x2080..=0x209CF).contains(&c_u32)
179+
}
180+
174181
fn is_identifier_start(c: char) -> bool {
175182
unicode_ident::is_xid_start(c)
176183
|| is_numerical_fraction_char(c)
@@ -182,6 +189,7 @@ fn is_identifier_start(c: char) -> bool {
182189

183190
fn is_identifier_continue(c: char) -> bool {
184191
(unicode_ident::is_xid_continue(c)
192+
|| is_subscript_char(c)
185193
|| is_currency_char(c)
186194
|| is_other_allowed_identifier_char(c))
187195
&& !is_exponent_char(c)
@@ -1049,3 +1057,12 @@ fn test_is_currency_char() {
10491057

10501058
assert!(!is_currency_char('E'));
10511059
}
1060+
1061+
#[test]
1062+
fn test_is_subscript_char() {
1063+
assert!(is_subscript_char('₅'));
1064+
assert!(is_subscript_char('₁'));
1065+
assert!(is_subscript_char('ₓ'));
1066+
assert!(is_subscript_char('ₘ'));
1067+
assert!(is_subscript_char('₎'));
1068+
}

0 commit comments

Comments
 (0)