Skip to content

Commit e50eded

Browse files
authored
Improve error position reported by the lexer (#186)
This introduces two helper methods `errorfAtPosition` and `panicfAtPosition`, and uses them for reporting errors caused by tokenizing string-like literals and skipping comments.
1 parent e6a5352 commit e50eded

File tree

2 files changed

+55
-36
lines changed

2 files changed

+55
-36
lines changed

lexer.go

+32-17
Original file line numberDiff line numberDiff line change
@@ -418,7 +418,7 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s
418418
for l.peekOk(i) {
419419
if l.slice(i, i+len(q)) == q {
420420
if len(content) == 0 && name == "identifier" {
421-
l.panicf("invalid empty identifier")
421+
l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i+len(q)), "invalid empty identifier")
422422
}
423423
l.skipN(i + len(q))
424424
return string(content)
@@ -428,7 +428,7 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s
428428
if c == '\\' {
429429
i++
430430
if !l.peekOk(i) {
431-
l.panicf("invalid escape sequence: \\<eof>")
431+
l.panicfAtPosition(token.Pos(l.pos+i-1), token.Pos(l.pos+i), "invalid escape sequence: \\<eof>")
432432
}
433433

434434
c := l.peek(i)
@@ -457,65 +457,69 @@ func (l *Lexer) consumeQuotedContent(q string, raw, unicode bool, name string) s
457457
case '\\', '?', '"', '\'', '`':
458458
content = append(content, c)
459459
case 'x', 'X':
460-
if !(l.peekOk(i+1) && char.IsHexDigit(l.peek(i)) && char.IsHexDigit(l.peek(i+1))) {
461-
l.panicf("invalid escape sequence: hex escape sequence must be follwed by 2 hex digits")
460+
for j := 0; j < 2; j++ {
461+
if !(l.peekOk(i+j) && char.IsHexDigit(l.peek(i+j))) {
462+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits")
463+
}
462464
}
463465
u, err := strconv.ParseUint(l.slice(i, i+2), 16, 8)
464466
if err != nil {
465-
l.panicf("invalid escape sequence: %v", err)
467+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+2), "invalid escape sequence: %v", err)
466468
}
467469
content = append(content, byte(u))
468470
i += 2
469471
case 'u', 'U':
470472
if !unicode {
471-
l.panicf("invalid escape sequence: \\%c is not allowed in %s", c, name)
473+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i), "invalid escape sequence: \\%c is not allowed in %s", c, name)
472474
}
473475
size := 4
474476
if c == 'U' {
475477
size = 8
476478
}
477479
for j := 0; j < size; j++ {
478480
if !(l.peekOk(i+j) && char.IsHexDigit(l.peek(i+j))) {
479-
l.panicf("invalid escape sequence: \\%c must be followed by %d hex digits", c, size)
481+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: \\%c must be followed by %d hex digits", c, size)
480482
}
481483
}
482484
u, err := strconv.ParseUint(l.slice(i, i+size), 16, 32)
483485
if err != nil {
484-
l.panicf("invalid escape sequence: %v", err)
486+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+size), "invalid escape sequence: %v", err)
485487
}
486488
if 0xD800 <= u && u <= 0xDFFF || 0x10FFFF < u {
487-
l.panicf("invalid escape sequence: invalid code point: U+%04X", u)
489+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+size), "invalid escape sequence: invalid code point: U+%04X", u)
488490
}
489491
var buf [utf8.MaxRune]byte
490492
n := utf8.EncodeRune(buf[:], rune(u))
491493
content = append(content, buf[:n]...)
492494
i += size
493495
case '0', '1', '2', '3':
494-
if !(l.peekOk(i+1) && char.IsOctalDigit(l.peek(i)) && char.IsOctalDigit(l.peek(i+1))) {
495-
l.panicf("invalid escape sequence: octal escape sequence must be follwed by 3 octal digits")
496+
for j := 0; j < 2; j++ {
497+
if !(l.peekOk(i+j) && char.IsOctalDigit(l.peek(i+j))) {
498+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+j+1), "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits")
499+
}
496500
}
497501
u, err := strconv.ParseUint(l.slice(i-1, i+2), 8, 8)
498502
if err != nil {
499-
l.panicf("invalid escape sequence: %v", err)
503+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i+2), "invalid escape sequence: %v", err)
500504
}
501505
content = append(content, byte(u))
502506
i += 2
503507
default:
504-
l.panicf("invalid escape sequence: \\%c", c)
508+
l.panicfAtPosition(token.Pos(l.pos+i-2), token.Pos(l.pos+i), "invalid escape sequence: \\%c", c)
505509
}
506510

507511
continue
508512
}
509513

510514
if c == '\n' && len(q) != 3 {
511-
l.panicf("unclosed %s: newline appears in non triple-quoted", name)
515+
l.panicfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i+1), "unclosed %s: newline appears in non triple-quoted", name)
512516
}
513517

514518
content = append(content, c)
515519
i++
516520
}
517521

518-
panic(l.errorf("unclosed %s", name))
522+
panic(l.errorfAtPosition(token.Pos(l.pos), token.Pos(l.pos+i), "unclosed %s", name))
519523
}
520524

521525
func (l *Lexer) skipSpaces() {
@@ -543,6 +547,7 @@ func (l *Lexer) skipComment() {
543547
}
544548

545549
func (l *Lexer) skipCommentUntil(end string, mustEnd bool) {
550+
pos := token.Pos(l.pos)
546551
for !l.eof() {
547552
if l.slice(0, len(end)) == end {
548553
l.skipN(len(end))
@@ -551,8 +556,7 @@ func (l *Lexer) skipCommentUntil(end string, mustEnd bool) {
551556
l.skip()
552557
}
553558
if mustEnd {
554-
// TODO: improve error position
555-
l.panicf("unclosed comment")
559+
l.panicfAtPosition(pos, token.Pos(l.pos), "unclosed comment")
556560
}
557561
}
558562

@@ -596,6 +600,17 @@ func (l *Lexer) errorf(msg string, param ...interface{}) *Error {
596600
}
597601
}
598602

603+
func (l *Lexer) errorfAtPosition(pos, end token.Pos, msg string, param ...interface{}) *Error {
604+
return &Error{
605+
Message: fmt.Sprintf(msg, param...),
606+
Position: l.Position(pos, end),
607+
}
608+
}
609+
599610
func (l *Lexer) panicf(msg string, param ...interface{}) {
600611
panic(l.errorf(msg, param...))
601612
}
613+
614+
func (l *Lexer) panicfAtPosition(pos, end token.Pos, msg string, param ...interface{}) {
615+
panic(l.errorfAtPosition(pos, end, msg, param...))
616+
}

lexer_test.go

+23-19
Original file line numberDiff line numberDiff line change
@@ -143,26 +143,27 @@ var lexerTestCases = []struct {
143143
var lexerWrongTestCase = []struct {
144144
source string
145145
pos Pos
146+
end Pos
146147
message string
147148
}{
148-
{"\b", 0, "illegal input character: '\\b'"},
149-
{`"foo`, 0, "unclosed string literal"},
150-
{`R"foo`, 1, "unclosed raw string literal"},
151-
{"'foo\n", 0, "unclosed string literal: newline appears in non triple-quoted"},
152-
{"R'foo\n", 1, "unclosed raw string literal: newline appears in non triple-quoted"},
153-
{"R'foo\\", 1, "invalid escape sequence: \\<eof>"},
154-
{`"\400"`, 0, "invalid escape sequence: \\4"},
155-
{`"\3xx"`, 0, "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits"},
156-
{`"\xZZ"`, 0, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
157-
{`"\XZZ"`, 0, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
158-
{`B"\u0031"`, 1, "invalid escape sequence: \\u is not allowed in bytes literal"},
159-
{`B"\U00000031"`, 1, "invalid escape sequence: \\U is not allowed in bytes literal"},
160-
{`B"\U00000031"`, 1, "invalid escape sequence: \\U is not allowed in bytes literal"},
161-
{`"\UFFFFFFFF"`, 0, "invalid escape sequence: invalid code point: U+FFFFFFFF"},
162-
{"``", 0, "invalid empty identifier"},
163-
{"1from", 1, "number literal cannot follow identifier without any spaces"},
164-
{`'''0`, 0, "unclosed triple-quoted string literal"},
165-
{`/*`, 2, "unclosed comment"},
149+
{"\b", 0, 0, "illegal input character: '\\b'"},
150+
{`"foo`, 0, 4, "unclosed string literal"},
151+
{`R"foo`, 1, 5, "unclosed raw string literal"},
152+
{"'foo\n", 0, 5, "unclosed string literal: newline appears in non triple-quoted"},
153+
{"R'foo\n", 1, 6, "unclosed raw string literal: newline appears in non triple-quoted"},
154+
{"R'foo\\", 5, 6, "invalid escape sequence: \\<eof>"},
155+
{`"\400"`, 1, 3, "invalid escape sequence: \\4"},
156+
{`"\3xx"`, 1, 4, "invalid escape sequence: octal escape sequence must be follwed by 3 octal digits"},
157+
{`"\xZZ"`, 1, 4, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
158+
{`"\XZZ"`, 1, 4, "invalid escape sequence: hex escape sequence must be follwed by 2 hex digits"},
159+
{`B"\u0031"`, 2, 4, "invalid escape sequence: \\u is not allowed in bytes literal"},
160+
{`B"\U00000031"`, 2, 4, "invalid escape sequence: \\U is not allowed in bytes literal"},
161+
{`B"\U00000031"`, 2, 4, "invalid escape sequence: \\U is not allowed in bytes literal"},
162+
{`"\UFFFFFFFF"`, 1, 11, "invalid escape sequence: invalid code point: U+FFFFFFFF"},
163+
{"``", 0, 2, "invalid empty identifier"},
164+
{"1from", 1, 1, "number literal cannot follow identifier without any spaces"},
165+
{`'''0`, 0, 4, "unclosed triple-quoted string literal"},
166+
{`/*`, 0, 2, "unclosed comment"},
166167
}
167168

168169
func testLexer(t *testing.T, source string, tokens []*Token) {
@@ -240,7 +241,10 @@ func TestLexerWrong(t *testing.T) {
240241
t.Errorf("expected error message: %q, but: %q", tc.message, e.Message)
241242
}
242243
if e.Position.Pos != tc.pos {
243-
t.Errorf("expected error position: %v, but: %v", tc.pos, e.Position.Pos)
244+
t.Errorf("expected error position (pos): %v, but: %v", tc.pos, e.Position.Pos)
245+
}
246+
if e.Position.End != tc.end {
247+
t.Errorf("expected error position (end): %v, but: %v", tc.end, e.Position.End)
244248
}
245249
} else {
246250
t.Errorf("unexpected error: %v", err)

0 commit comments

Comments
 (0)