Speed up fastscan package (#249)

I looked a little bit into why the `fastscan` package was slower than I expected. And it's because I accidentally left stuff in the lexer that is totally unused but is the primary source of allocations and memory usage (and thus also bad for latency since allocations aren't free). Once upon a time, in an older version of the lexer in protoparse (on which this fastscan lexer is based), this stuff was used to capture the actual raw text for a token. The new lexer in protocompile uses a completely different approach, to reduce the memory usage. But it's completely unused here. Removing it basically doubles the throughput of `fastscan` and causes it to use 1/3rd as many allocations and 1/4th as much memory. ``` -- before -- BenchmarkGoogleapisFastScan-10 5 210397558 ns/op 468123099 B/op 11217259 allocs/op BenchmarkGoogleapisFastScan-10 5 202723933 ns/op 468156548 B/op 11217275 allocs/op -- after -- BenchmarkGoogleapisFastScan-10 12 93760795 ns/op 111257610 B/op 3710000 allocs/op BenchmarkGoogleapisFastScan-10 12 94548743 ns/op 111259758 B/op 3710010 allocs/op ```
bufbuild · Mar 7, 2024 · f62a9f6 · f62a9f6
1 parent f4c4a6f
commit f62a9f6
Showing 1 changed file with 0 additions and 25 deletions.
diff --git a/parser/fastscan/lexer.go b/parser/fastscan/lexer.go
@@ -78,7 +78,6 @@ func (t tokenType) describe() string {
 
 type runeReader struct {
 	rr     *bufio.Reader
-	marked []rune
 	unread []rune
 	err    error
 }
@@ -90,40 +89,19 @@ func (rr *runeReader) readRune() (r rune, err error) {
 	if len(rr.unread) > 0 {
 		r := rr.unread[len(rr.unread)-1]
 		rr.unread = rr.unread[:len(rr.unread)-1]
-		if rr.marked != nil {
-			rr.marked = append(rr.marked, r)
-		}
 		return r, nil
 	}
 	r, _, err = rr.rr.ReadRune()
 	if err != nil {
 		rr.err = err
-	} else if rr.marked != nil {
-		rr.marked = append(rr.marked, r)
 	}
 	return r, err
 }
 
 func (rr *runeReader) unreadRune(r rune) {
-	if rr.marked != nil {
-		if rr.marked[len(rr.marked)-1] != r {
-			panic("unread rune is not the same as last marked rune!")
-		}
-		rr.marked = rr.marked[:len(rr.marked)-1]
-	}
 	rr.unread = append(rr.unread, r)
 }
 
-func (rr *runeReader) startMark(initial rune) {
-	rr.marked = []rune{initial}
-}
-
-func (rr *runeReader) endMark() string {
-	m := string(rr.marked)
-	rr.marked = rr.marked[:0]
-	return m
-}
-
 type lexer struct {
 	input *runeReader
 	// start of the next rune in the input
@@ -161,8 +139,6 @@ func (l *lexer) adjustPos(c rune) {
 }
 
 func (l *lexer) Lex() (tokenType, any, error) {
-	l.input.endMark() // reset, just in case
-
 	for {
 		c, err := l.input.readRune()
 		if err == io.EOF {
@@ -181,7 +157,6 @@ func (l *lexer) Lex() (tokenType, any, error) {
 			continue
 		}
 
-		l.input.startMark(c)
 		l.prevTokenLine, l.prevTokenCol = l.curLine, l.curCol
 		l.adjustPos(c)
 		if c == '.' {