Scanner refactor (#8)

* Refactor parser and scanner for cleaner code Significant changes include: - Simplified the NewParser function in parser.go by removing unnecessary steps and adding a next() call. - Refactored the Scan function in scanner.go to return position, token, and literal directly instead of setting them as properties on the Scanner struct. - Updated tests in scanner_test.go to reflect these changes. * Enhanced error handling in parser and scanner The parser's error matching has been improved to check for substrings, providing more flexibility. The scanner now advances to the next token after identifying an identifier, ensuring proper sequence progression. Adjustments were also made to the scanner tests: literal comparison no longer trims spaces from input, and position calculation now accounts for an extra character. Lastly, a change was made in how scanning errors are handled during testing. * Added PositionFor function and corresponding tests A new function, PositionFor, has been added to the token package. This function returns the Position value for a given file position. If the provided position is out of bounds, it's adjusted to match File.Offset behavior. Alongside this addition, a test suite has also been created to ensure that calling PositionFor yields equivalent results as calling file.PositionFor(p, false). * Refactor scanner initialization and add position method The scanner's initialization has been refactored for better readability. The creation of the bufio.Scanner and setting its split function is now done before creating the Scanner struct. A new method, Position, has been added to the Scanner which returns a token's position in a file. In addition, changes have been made to the scanner tests. The test case for scanning an identifier followed by a token has been replaced with one for scanning an identifier followed by whitespace. Additional assertions have also been added to ensure that after scanning an identifier or special character, the next scan returns EOF (end of file). * Enhanced scanner test coverage Added new tests to the scanner_test.go file. These include a test for the Position function, ensuring it's equivalent to calling file.PositionFor(p, false). Also added checks for token position and newline tokens in existing tests. Corrected an issue with scanning newline followed by token. * Refactored scanner logic and expanded tests The scanning logic has been refactored for better readability and efficiency. The 'done' flag check is now performed after the offset update, ensuring accurate line addition in case of newline characters. In addition, the test coverage for the scanner functionality has been significantly expanded. New test cases have been added to verify correct token scanning following a newline character, with various input scenarios considered. This ensures robustness of the scanner across different use-cases. * Refactor scanner tests for readability Significant changes include: - Removed redundant test cases in the scanner_test.go file - Simplified token scanning tests by focusing on space-separated tokens only - This results in a cleaner, more maintainable test suite
unmango · Jan 18, 2025 · b378b8d · b378b8d
1 parent bda6241
commit b378b8d
Show file tree

Hide file tree

Showing 6 changed files with 299 additions and 247 deletions.
diff --git a/parser.go b/parser.go
@@ -19,15 +19,13 @@ type Parser struct {
 }
 
 func NewParser(r io.Reader, file *token.File) *Parser {
-	s := NewScanner(r, file)
-	s.Scan() // TODO: Cleaner priming
-
-	return &Parser{
-		s:    s,
-		file: file, // TODO: Same file? Different file?
-		tok:  s.Token(),
-		lit:  s.Literal(),
+	p := &Parser{
+		s:    NewScanner(r, file),
+		file: file,
 	}
+	p.next()
+
+	return p
 }
 
 func (p *Parser) ParseFile() (*ast.File, error) {
@@ -56,12 +54,7 @@ func (p *Parser) error(pos token.Pos, msg string) {
 }
 
 func (p *Parser) next() {
-	if p.s.Scan() {
-		// TODO: p.pos
-		p.tok, p.lit = p.s.Token(), p.s.Literal()
-	} else {
-		p.tok = token.EOF
-	}
+	p.pos, p.tok, p.lit = p.s.Scan()
 }
 
 func (p *Parser) parseFile() *ast.File {

diff --git a/parser_test.go b/parser_test.go
@@ -35,6 +35,8 @@ var _ = Describe("Parser", func() {
 
 		_, err := p.ParseFile()
 
-		Expect(err).To(MatchError("expected 'IDENT'"))
+		Expect(err).To(MatchError(
+			ContainSubstring("expected 'IDENT'"),
+		))
 	})
 })
diff --git a/scanner.go b/scanner.go
@@ -21,11 +21,13 @@ type Scanner struct {
 }
 
 func NewScanner(r io.Reader, file *token.File) *Scanner {
+	scanner := bufio.NewScanner(r)
+	scanner.Split(ScanTokens)
+
 	s := &Scanner{
-		s:    bufio.NewScanner(r),
+		s:    scanner,
 		file: file,
 	}
-	s.s.Split(ScanTokens)
 	s.next()
 
 	return s
@@ -35,97 +37,93 @@ func (s Scanner) Err() error {
 	return s.s.Err()
 }
 
-func (s Scanner) Token() token.Token {
-	return s.tok
-}
-
-func (s Scanner) Literal() string {
-	return s.lit
+func (s Scanner) Position(pos token.Pos) token.Position {
+	return token.PositionFor(s.file, pos)
 }
 
-func (s Scanner) Pos() token.Pos {
-	return s.file.Pos(s.offset)
-}
-
-func (s *Scanner) Scan() bool {
+func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
 	if s.done {
-		s.tok = token.EOF
-		return false
+		pos = s.file.Pos(s.offset)
+		tok = token.EOF
+		return
 	}
 
+	s.skipWhitespace()
+
+	// current token start
+	pos = s.file.Pos(s.offset)
 	var atNewline bool
 
-	s.skipWhitespace()
 	switch txt := s.s.Text(); {
 	case token.IsIdentifier(txt):
-		s.lit = txt
+		lit = txt
+		s.next()
 		if len(txt) > 1 {
-			s.tok = token.Lookup(txt)
+			tok = token.Lookup(txt)
 		} else {
-			s.tok = token.IDENT
+			tok = token.IDENT
 		}
 	default:
+		s.next()
 		switch txt {
 		case "=":
-			s.tok = token.RECURSIVE_ASSIGN
+			tok = token.RECURSIVE_ASSIGN
 		case ":=":
-			s.tok = token.SIMPLE_ASSIGN
+			tok = token.SIMPLE_ASSIGN
 		case "::=":
-			s.tok = token.POSIX_ASSIGN
+			tok = token.POSIX_ASSIGN
 		case ":::=":
-			s.tok = token.IMMEDIATE_ASSIGN
+			tok = token.IMMEDIATE_ASSIGN
 		case "?=":
-			s.tok = token.IFNDEF_ASSIGN
+			tok = token.IFNDEF_ASSIGN
 		case "!=":
-			s.tok = token.SHELL_ASSIGN
+			tok = token.SHELL_ASSIGN
 		case ",":
-			s.tok = token.COMMA
+			tok = token.COMMA
 		case "\n":
 			atNewline = true
-			s.tok = token.NEWLINE
+			tok = token.NEWLINE
 		case "\t":
-			s.tok = token.TAB
+			tok = token.TAB
 		case "(":
-			s.tok = token.LPAREN
+			tok = token.LPAREN
 		case ")":
-			s.tok = token.RPAREN
+			tok = token.RPAREN
 		case "{":
-			s.tok = token.LBRACE
+			tok = token.LBRACE
 		case "}":
-			s.tok = token.RBRACE
+			tok = token.RBRACE
 		case "$":
-			s.tok = token.DOLLAR
+			tok = token.DOLLAR
 		case ":":
-			s.tok = token.COLON
+			tok = token.COLON
 		case ";":
-			s.tok = token.SEMI
+			tok = token.SEMI
 		case "|":
-			s.tok = token.PIPE
+			tok = token.PIPE
 		case "#":
 			// TODO
 			// s.lit = s.scanComment()
-			s.tok = token.COMMENT
+			tok = token.COMMENT
 		default:
-			s.tok = token.UNSUPPORTED
+			tok = token.UNSUPPORTED
 			s.lit = txt
 		}
 	}
 
-	s.next()
 	if atNewline && s.done {
-		s.tok = token.EOF
-		return false
-	} else {
-		return true
+		tok = token.EOF
 	}
+
+	return
 }
 
 func (s *Scanner) next() {
-	s.done = !s.s.Scan()
 	s.offset = s.rdOffset
 	if bytes.ContainsRune(s.s.Bytes(), '\n') {
 		s.file.AddLine(s.offset)
 	}
+	s.done = !s.s.Scan()
 	s.rdOffset += len(s.s.Bytes())
 }