Skip to content

Commit 374801c

Browse files
committed
feat(fe): error on more confusables like ǃ and ﴾
Also remove some assumptions that certain symbols in expressions are only one byte.
1 parent 2990b04 commit 374801c

File tree

5 files changed

+206
-61
lines changed

5 files changed

+206
-61
lines changed

docs/CHANGELOG.md

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@ Semantic Versioning.
1313
* VS Code: You can now make quick-lint-js messages fun and insulting with the
1414
`quick-lint-js.snarky` setting (disabled by default). (Implemented by
1515
[vegerot][].)
16-
* Using Greek question mark (;, U+037E) instead of a semicolon (;, U+003B) now
17-
reports [E0457][] ("this is a Greek Question Mark, not a semicolon (';')").
16+
* Using Greek question mark (`;`, U+037E) instead of a semicolon (`;`, U+003B)
17+
now reports [E0457][] ("this is a Greek Question Mark, not a semicolon
18+
(';')"). This diagnostic is also reported for similar-looking characters like
19+
`ǃ` (which should be `!`) and `` (which should be `(`).
1820
* TypeScript: Decorators on abstract classes are now parsed. ([#1194][])
1921

2022
### Fixed

src/quick-lint-js/fe/expression.h

Lines changed: 34 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -615,23 +615,19 @@ class Expression::Call final : public Expression {
615615
Source_Code_Span left_paren_span, const Char8 *span_end,
616616
std::optional<Source_Code_Span> optional_chaining_operator)
617617
: Expression(kind),
618-
call_left_paren_begin_(left_paren_span.begin()),
618+
call_left_paren_(left_paren_span),
619619
span_end_(span_end),
620620
children_(children),
621621
optional_chaining_operator_begin_(
622622
optional_chaining_operator.has_value()
623623
? optional_chaining_operator->begin()
624624
: nullptr) {
625-
QLJS_ASSERT(left_paren_span.size() == 1);
626625
if (optional_chaining_operator.has_value()) {
627626
QLJS_ASSERT(optional_chaining_operator->size() == 2);
628627
}
629628
}
630629

631-
Source_Code_Span left_paren_span() const {
632-
return Source_Code_Span(this->call_left_paren_begin_,
633-
this->call_left_paren_begin_ + 1);
634-
}
630+
Source_Code_Span left_paren_span() const { return this->call_left_paren_; }
635631

636632
std::optional<Source_Code_Span> optional_chaining_operator_span() const {
637633
if (this->optional_chaining_operator_begin_ == nullptr) {
@@ -641,7 +637,7 @@ class Expression::Call final : public Expression {
641637
this->optional_chaining_operator_begin_ + 2);
642638
}
643639

644-
const Char8 *call_left_paren_begin_;
640+
Source_Code_Span call_left_paren_;
645641
const Char8 *span_end_;
646642
Expression_Arena::Array_Ptr<Expression *> children_;
647643
const Char8 *optional_chaining_operator_begin_ = nullptr;
@@ -857,15 +853,11 @@ class Expression::Non_Null_Assertion final : public Expression {
857853
static constexpr Expression_Kind kind = Expression_Kind::Non_Null_Assertion;
858854

859855
explicit Non_Null_Assertion(Expression *child, Source_Code_Span bang_span)
860-
: Expression(kind), bang_end_(bang_span.end()), child_(child) {
861-
QLJS_ASSERT(same_pointers(this->bang_span(), bang_span));
862-
}
856+
: Expression(kind), bang_(bang_span), child_(child) {}
863857

864-
Source_Code_Span bang_span() const {
865-
return Source_Code_Span(this->bang_end_ - 1, this->bang_end_);
866-
}
858+
Source_Code_Span bang_span() const { return this->bang_; }
867859

868-
const Char8 *bang_end_;
860+
Source_Code_Span bang_;
869861
Expression *child_;
870862
};
871863
static_assert(Expression_Arena::is_allocatable<Expression::Non_Null_Assertion>);
@@ -895,16 +887,12 @@ class Expression::Optional final : public Expression {
895887
static constexpr Expression_Kind kind = Expression_Kind::Optional;
896888

897889
explicit Optional(Expression *child, Source_Code_Span question_span)
898-
: Expression(kind), child_(child), question_end_(question_span.end()) {
899-
QLJS_ASSERT(question_span.end() - question_span.begin() == 1);
900-
}
890+
: Expression(kind), child_(child), question_(question_span) {}
901891

902-
Source_Code_Span question_span() const {
903-
return Source_Code_Span(this->question_end_ - 1, this->question_end_);
904-
}
892+
Source_Code_Span question_span() const { return this->question_; }
905893

906894
Expression *child_;
907-
const Char8 *question_end_;
895+
Source_Code_Span question_;
908896
};
909897
static_assert(Expression_Arena::is_allocatable<Expression::Optional>);
910898

@@ -924,25 +912,26 @@ class Expression::Paren_Empty final : public Expression {
924912
public:
925913
static constexpr Expression_Kind kind = Expression_Kind::Paren_Empty;
926914

927-
explicit Paren_Empty(Source_Code_Span span) : Expression(kind), span_(span) {}
915+
explicit Paren_Empty(Source_Code_Span left_paren_span,
916+
Source_Code_Span right_paren_span)
917+
: Expression(kind),
918+
left_paren_(left_paren_span),
919+
right_paren_(right_paren_span) {}
928920

929-
Source_Code_Span left_paren_span() const {
930-
return Source_Code_Span(this->span_.begin(), this->span_.begin() + 1);
931-
}
921+
Source_Code_Span left_paren_span() const { return this->left_paren_; }
932922

933-
Source_Code_Span right_paren_span() const {
934-
return Source_Code_Span(this->span_.end() - 1, this->span_.end());
935-
}
923+
Source_Code_Span right_paren_span() const { return this->right_paren_; }
936924

937925
void report_missing_expression_error(Diag_Reporter *reporter) {
938926
reporter->report(Diag_Missing_Expression_Between_Parentheses{
939-
.left_paren_to_right_paren = this->span_,
927+
.left_paren_to_right_paren = this->span(),
940928
.left_paren = this->left_paren_span(),
941929
.right_paren = this->right_paren_span(),
942930
});
943931
}
944932

945-
Source_Code_Span span_;
933+
Source_Code_Span left_paren_;
934+
Source_Code_Span right_paren_;
946935
};
947936
static_assert(Expression_Arena::is_allocatable<Expression::Paren_Empty>);
948937

@@ -1072,16 +1061,12 @@ class Expression::Trailing_Comma final : public Expression {
10721061

10731062
explicit Trailing_Comma(Expression_Arena::Array_Ptr<Expression *> children,
10741063
Source_Code_Span comma_span)
1075-
: Expression(kind), children_(children), comma_end_(comma_span.end()) {
1076-
QLJS_ASSERT(comma_span.end() == comma_span.begin() + 1);
1077-
}
1064+
: Expression(kind), children_(children), comma_(comma_span) {}
10781065

1079-
Source_Code_Span comma_span() const {
1080-
return Source_Code_Span(this->comma_end_ - 1, this->comma_end_);
1081-
}
1066+
Source_Code_Span comma_span() const { return this->comma_; }
10821067

10831068
Expression_Arena::Array_Ptr<Expression *> children_;
1084-
const Char8 *comma_end_;
1069+
Source_Code_Span comma_;
10851070
};
10861071

10871072
class Expression::Type_Annotated final : public Expression {
@@ -1093,23 +1078,18 @@ class Expression::Type_Annotated final : public Expression {
10931078
const Char8 *span_end)
10941079
: Expression(kind),
10951080
child_(child),
1096-
colon_(colon_span.begin()),
1081+
colon_(colon_span),
10971082
type_visits_(std::move(type_visits)),
1098-
span_end_(span_end) {
1099-
QLJS_ASSERT(*colon_span.begin() == u8':');
1100-
QLJS_ASSERT(colon_span.size() == 1);
1101-
}
1083+
span_end_(span_end) {}
11021084

1103-
Source_Code_Span colon_span() const {
1104-
return Source_Code_Span(this->colon_, this->colon_ + 1);
1105-
}
1085+
Source_Code_Span colon_span() const { return this->colon_; }
11061086

11071087
void visit_type_annotation(Parse_Visitor_Base &v) {
11081088
std::move(this->type_visits_).move_into(v);
11091089
}
11101090

11111091
Expression *child_;
1112-
const Char8 *colon_;
1092+
Source_Code_Span colon_;
11131093
Buffering_Visitor type_visits_{nullptr};
11141094
const Char8 *span_end_;
11151095
};
@@ -1451,19 +1431,22 @@ inline Source_Code_Span Expression::span() const {
14511431
case Expression_Kind::Non_Null_Assertion: {
14521432
auto *assertion = expression_cast<const Non_Null_Assertion *>(this);
14531433
return Source_Code_Span(assertion->child_->span().begin(),
1454-
assertion->bang_end_);
1434+
assertion->bang_.end());
14551435
}
14561436
case Expression_Kind::Object:
14571437
return expression_cast<const Object *>(this)->span_;
14581438
case Expression_Kind::Optional: {
14591439
auto *optional = expression_cast<const Expression::Optional *>(this);
14601440
return Source_Code_Span(optional->child_->span().begin(),
1461-
optional->question_end_);
1441+
optional->question_.end());
14621442
}
14631443
case Expression_Kind::Paren:
14641444
return expression_cast<const Paren *>(this)->span_;
1465-
case Expression_Kind::Paren_Empty:
1466-
return expression_cast<const Paren_Empty *>(this)->span_;
1445+
case Expression_Kind::Paren_Empty: {
1446+
auto *paren_empty = expression_cast<const Paren_Empty *>(this);
1447+
return Source_Code_Span(paren_empty->left_paren_.begin(),
1448+
paren_empty->right_paren_.end());
1449+
}
14671450
case Expression_Kind::Private_Variable:
14681451
return expression_cast<const Private_Variable *>(this)
14691452
->variable_identifier_.span();
@@ -1489,7 +1472,7 @@ inline Source_Code_Span Expression::span() const {
14891472
case Expression_Kind::Trailing_Comma: {
14901473
auto *comma = expression_cast<const Trailing_Comma *>(this);
14911474
return Source_Code_Span(comma->children_.front()->span().begin(),
1492-
comma->comma_end_);
1475+
comma->comma_.end());
14931476
}
14941477
case Expression_Kind::Type_Annotated: {
14951478
auto *annotated = expression_cast<const Type_Annotated *>(this);

src/quick-lint-js/fe/lex.cpp

Lines changed: 102 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -93,16 +93,113 @@ constexpr char32_t right_double_quote = U'\u201d';
9393

9494
struct Confusable_Symbol {
9595
char32_t confusable;
96-
Char8 confusable_name[20];
96+
Char8 confusable_name[51];
9797
Char8 symbol;
98-
Char8 symbol_name[20];
98+
Char8 symbol_name[21];
9999
Token_Type symbol_token_type;
100100
};
101101

102102
Confusable_Symbol confusable_symbols[] = {
103-
{0x037e, u8"Greek Question Mark", u8';', u8"semicolon",
104-
Token_Type::semicolon},
105-
// TODO(strager): Add more.
103+
// clang-format off
104+
{ 0x037e, u8"Greek Question Mark", u8';', u8"semicolon", Token_Type::semicolon},
105+
106+
{ 0x02d0, u8"Modifier Letter Triangular Colon", u8':', u8"colon", Token_Type::colon},
107+
{ 0x02f8, u8"Modifier Letter Raised Colon", u8':', u8"colon", Token_Type::colon},
108+
{ 0x0589, u8"Armenian Full Stop", u8':', u8"colon", Token_Type::colon},
109+
{ 0x05c3, u8"Hebrew Punctuation Sof Pasuq", u8':', u8"colon", Token_Type::colon},
110+
{ 0x0703, u8"Syriac Supralinear Colon", u8':', u8"colon", Token_Type::colon},
111+
{ 0x0704, u8"Syriac Sublinear Colon", u8':', u8"colon", Token_Type::colon},
112+
{ 0x0903, u8"Devanagari Sign Visarga", u8':', u8"colon", Token_Type::colon},
113+
{ 0x0a83, u8"Gujarati Sign Visarga", u8':', u8"colon", Token_Type::colon},
114+
{ 0x16ec, u8"Runic Multiple Punctuation", u8':', u8"colon", Token_Type::colon},
115+
{ 0x1803, u8"Mongolian Full Stop", u8':', u8"colon", Token_Type::colon},
116+
{ 0x1809, u8"Mongolian Manchu Full Stop", u8':', u8"colon", Token_Type::colon},
117+
{ 0x205a, u8"Two Dot Punctuation", u8':', u8"colon", Token_Type::colon},
118+
{ 0x2236, u8"Ratio", u8':', u8"colon", Token_Type::colon},
119+
{ 0xa4fd, u8"Lisu Letter Tone Mya Jeu", u8':', u8"colon", Token_Type::colon},
120+
{ 0xa789, u8"Modifier Letter Colon", u8':', u8"colon", Token_Type::colon},
121+
{ 0xfe30, u8"Presentation Form For Vertical Two Dot Leader", u8':', u8"colon", Token_Type::colon},
122+
{ 0xff1a, u8"Fullwidth Colon", u8':', u8"colon", Token_Type::colon},
123+
124+
{ 0x00b8, u8"Cedilla", u8',', u8"comma", Token_Type::comma},
125+
{ 0x060d, u8"Arabic Date Separator", u8',', u8"comma", Token_Type::comma},
126+
{ 0x066b, u8"Arabic Decimal Separator", u8',', u8"comma", Token_Type::comma},
127+
{ 0x201a, u8"Single Low-9 Quotation Mark", u8',', u8"comma", Token_Type::comma},
128+
{ 0xa4f9, u8"Lisu Letter Tone Na Po", u8',', u8"comma", Token_Type::comma},
129+
130+
{ 0x01c3, u8"Latin Letter Retroflex Click", u8'!', u8"exclamation mark", Token_Type::bang},
131+
{ 0x2d51, u8"Tifinagh Letter Tuareg Yang", u8'!', u8"exclamation mark", Token_Type::bang},
132+
{ 0xff01, u8"Fullwidth Exclamation Mark", u8'!', u8"exclamation mark", Token_Type::bang},
133+
134+
// TODO(strager): Also match symbols like "․․․".
135+
{ 0x0660, u8"Arabic-Indic Digit Zero", u8'.', u8"dot", Token_Type::dot},
136+
{ 0x06f0, u8"Extended Arabic-Indic Digit Zero", u8'.', u8"dot", Token_Type::dot},
137+
{ 0x0701, u8"Syriac Supralinear Full Stop", u8'.', u8"dot", Token_Type::dot},
138+
{ 0x0702, u8"Syriac Sublinear Full Stop", u8'.', u8"dot", Token_Type::dot},
139+
{ 0x2024, u8"One Dot Leader", u8'.', u8"dot", Token_Type::dot},
140+
{ 0xa4f8, u8"Lisu Letter Tone Mya Ti", u8'.', u8"dot", Token_Type::dot},
141+
{ 0xa60e, u8"Vai Full Stop", u8'.', u8"dot", Token_Type::dot},
142+
{0x10a50, u8"Kharoshthi Punctuation Dot", u8'.', u8"dot", Token_Type::dot},
143+
{0x1d16d, u8"Musical Symbol Combining Augmentation Dot", u8'.', u8"dot", Token_Type::dot},
144+
145+
// NOTE(strager): We diverge from Unicode here. Unicode considers a few of these as parentheses.
146+
{ 0x2772, u8"Light Left Tortoise Shell Bracket Ornament", u8'(', u8"left square bracket", Token_Type::left_square},
147+
{ 0x2773, u8"Light Right Tortoise Shell Bracket Ornament", u8')', u8"right square bracket", Token_Type::right_square},
148+
{ 0x3014, u8"Left Tortoise Shell Bracket", u8'(', u8"left square bracket", Token_Type::left_square},
149+
{ 0x3015, u8"Right Tortoise Shell Bracket", u8')', u8"right square bracket", Token_Type::right_square},
150+
{ 0xff3b, u8"Fullwidth Left Square Bracket", u8'(', u8"left square bracket", Token_Type::left_square},
151+
{ 0xff3d, u8"Fullwidth Right Square Bracket", u8')', u8"right square bracket", Token_Type::right_square},
152+
153+
{ 0x2768, u8"Medium Left Parenthesis Ornament", u8'(', u8"left parenthesis", Token_Type::left_paren},
154+
{ 0x2769, u8"Medium Right Parenthesis Ornament", u8')', u8"right parenthesis", Token_Type::right_paren},
155+
{ 0xfd3e, u8"Ornate Left Parenthesis", u8'(', u8"left parenthesis", Token_Type::left_paren},
156+
{ 0xfd3f, u8"Ornate Right Parenthesis", u8')', u8"right parenthesis", Token_Type::right_paren},
157+
158+
{ 0x2774, u8"Medium Left Curly Bracket Ornament", u8'{', u8"left curly bracket", Token_Type::left_curly},
159+
{ 0x2775, u8"Medium Right Curly Bracket Ornament", u8'}', u8"right curly bracket", Token_Type::right_curly},
160+
{0x1d114, u8"Musical Symbol Brace", u8'{', u8"left curly bracket", Token_Type::left_curly},
161+
162+
// TODO(strager): Also match symbols like "ꝸ=" and "᐀᐀".
163+
// NOTE(strager): 0x0294 is legal in identifiers.
164+
{ 0x0294, u8"Latin Letter Glottal Stop", u8'?', u8"question mark", Token_Type::question},
165+
{ 0x0241, u8"Latin Capital Letter Glottal Stop", u8'?', u8"question mark", Token_Type::question},
166+
{ 0x097d, u8"Devanagari Letter Glottal Stop", u8'?', u8"question mark", Token_Type::question},
167+
{ 0x13ae, u8"Cherokee Letter He", u8'?', u8"question mark", Token_Type::question},
168+
{ 0xa6eb, u8"Bamum Letter Ntuu", u8'?', u8"question mark", Token_Type::question},
169+
170+
{ 0xa778, u8"Latin Small Letter Um", u8'&', u8"ampersand", Token_Type::ampersand},
171+
172+
{ 0x066d, u8"Arabic Five Pointed Star", u8'*', u8"asterisk", Token_Type::star},
173+
{ 0x204e, u8"Low Asterisk", u8'*', u8"asterisk", Token_Type::star},
174+
{ 0x2217, u8"Asterisk Operator", u8'*', u8"asterisk", Token_Type::star},
175+
{0x1031f, u8"Old Italic Letter Ess", u8'*', u8"asterisk", Token_Type::star},
176+
177+
{ 0x02c4, u8"Modifier Letter Up Arrowhead", u8'^', u8"circumflex", Token_Type::circumflex},
178+
{ 0x02c6, u8"Modifier Letter Circumflex Accent", u8'^', u8"circumflex", Token_Type::circumflex},
179+
180+
{ 0x02c2, u8"Modifier Letter Left Arrowhead", u8'<', u8"less than", Token_Type::less},
181+
{ 0x1438, u8"Canadian Syllabics Pa", u8'<', u8"less than", Token_Type::less},
182+
{ 0x16b2, u8"Runic Letter Kauna", u8'<', u8"less than", Token_Type::less},
183+
{ 0x2039, u8"Single Left-Pointing Angle Quotation Mark", u8'<', u8"less than", Token_Type::less},
184+
{ 0x276e, u8"Heavy Left-Pointing Angle Quotation Mark Ornament", u8'<', u8"less than", Token_Type::less},
185+
{0x1d236, u8"Greek Instrumental Notation Symbol-40", u8'<', u8"less than", Token_Type::less},
186+
187+
{ 0x02c3, u8"Modifier Letter Right Arrowhead", u8'>', u8"greater than", Token_Type::greater},
188+
{ 0x1433, u8"Canadian Syllabics Po", u8'>', u8"greater than", Token_Type::greater},
189+
{ 0x203a, u8"Single Right-Pointing Angle Quotation Mark", u8'>', u8"greater than", Token_Type::greater},
190+
{ 0x276f, u8"Heavy Right-Pointing Angle Quotation Mark Ornament", u8'>', u8"greater than", Token_Type::greater},
191+
{0x16f3f, u8"Miao Letter Archaic Zza", u8'>', u8"greater than", Token_Type::greater},
192+
{0x1d237, u8"Greek Instrumental Notation Symbol-42", u8'>', u8"greater than", Token_Type::greater},
193+
194+
{ 0x02dc, u8"Small Tilde", u8'~', u8"tilde", Token_Type::tilde},
195+
{ 0x1fc0, u8"Greek Perispomeni", u8'~', u8"tilde", Token_Type::tilde},
196+
{ 0x2053, u8"Swung Dash", u8'~', u8"tilde", Token_Type::tilde},
197+
{ 0x223c, u8"Tilde Operator", u8'~', u8"tilde", Token_Type::tilde},
198+
199+
{ 0x1400, u8"Canadian Syllabics Hyphen", u8'=', u8"equals", Token_Type::equal},
200+
{ 0x2e40, u8"Double Hyphen", u8'=', u8"equals", Token_Type::equal},
201+
{ 0x30a0, u8"Katakana-Hiragana Double Hyphen", u8'=', u8"equals", Token_Type::equal},
202+
{ 0xa4ff, u8"Lisu Punctuation Full Stop", u8'=', u8"equals", Token_Type::equal},
106203
};
107204

108205
bool look_up_in_unicode_table(const std::uint8_t* table, std::size_t table_size,

src/quick-lint-js/fe/parse-expression.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -563,8 +563,8 @@ Expression* Parser::parse_primary_expression(Parse_Visitor_Base& v,
563563
// () => {}
564564
Source_Code_Span right_paren_span = this->peek().span();
565565
this->skip();
566-
return this->make_expression<Expression::Paren_Empty>(
567-
Source_Code_Span(left_paren_span.begin(), right_paren_span.end()));
566+
return this->make_expression<Expression::Paren_Empty>(left_paren_span,
567+
right_paren_span);
568568
}
569569

570570
// (x) => {}
@@ -2581,7 +2581,7 @@ Expression* Parser::parse_arrow_function_expression_remainder(
25812581
paren_empty->report_missing_expression_error(this->diag_reporter_);
25822582
} else {
25832583
// () => {}
2584-
parameter_list_begin = paren_empty->span_.begin();
2584+
parameter_list_begin = paren_empty->left_paren_.begin();
25852585
}
25862586
break;
25872587
}

0 commit comments

Comments
 (0)