Skip to content

Commit

Permalink
Scanner refactor (#8)
Browse files Browse the repository at this point in the history
* Refactor parser and scanner for cleaner code

Significant changes include:
- Simplified the NewParser function in parser.go by removing unnecessary steps and adding a next() call.
- Refactored the Scan function in scanner.go to return position, token, and literal directly instead of setting them as properties on the Scanner struct.
- Updated tests in scanner_test.go to reflect these changes.

* Enhanced error handling in parser and scanner

The parser's error matching has been improved to check for substrings, providing more flexibility. The scanner now advances to the next token after identifying an identifier, ensuring proper sequence progression. Adjustments were also made to the scanner tests: literal comparison no longer trims spaces from input, and position calculation now accounts for an extra character. Lastly, a change was made in how scanning errors are handled during testing.

* Added PositionFor function and corresponding tests

A new function, PositionFor, has been added to the token package. This function returns the Position value for a given file position. If the provided position is out of bounds, it's adjusted to match File.Offset behavior. Alongside this addition, a test suite has also been created to ensure that calling PositionFor yields equivalent results as calling file.PositionFor(p, false).

* Refactor scanner initialization and add position method

The scanner's initialization has been refactored for better readability. The creation of the bufio.Scanner and setting its split function is now done before creating the Scanner struct. A new method, Position, has been added to the Scanner which returns a token's position in a file.

In addition, changes have been made to the scanner tests. The test case for scanning an identifier followed by a token has been replaced with one for scanning an identifier followed by whitespace. Additional assertions have also been added to ensure that after scanning an identifier or special character, the next scan returns EOF (end of file).

* Enhanced scanner test coverage

Added new tests to the scanner_test.go file. These include a test for the Position function, ensuring it's equivalent to calling file.PositionFor(p, false). Also added checks for token position and newline tokens in existing tests. Corrected an issue with scanning newline followed by token.

* Refactored scanner logic and expanded tests

The scanning logic has been refactored for better readability and efficiency. The 'done' flag check is now performed after the offset update, ensuring accurate line addition in case of newline characters.

In addition, the test coverage for the scanner functionality has been significantly expanded. New test cases have been added to verify correct token scanning following a newline character, with various input scenarios considered. This ensures robustness of the scanner across different use-cases.

* Refactor scanner tests for readability

Significant changes include:
- Removed redundant test cases in the scanner_test.go file
- Simplified token scanning tests by focusing on space-separated tokens only
- This results in a cleaner, more maintainable test suite
  • Loading branch information
UnstoppableMango authored Jan 18, 2025
1 parent bda6241 commit b378b8d
Show file tree
Hide file tree
Showing 6 changed files with 299 additions and 247 deletions.
21 changes: 7 additions & 14 deletions parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,13 @@ type Parser struct {
}

func NewParser(r io.Reader, file *token.File) *Parser {
s := NewScanner(r, file)
s.Scan() // TODO: Cleaner priming

return &Parser{
s: s,
file: file, // TODO: Same file? Different file?
tok: s.Token(),
lit: s.Literal(),
p := &Parser{
s: NewScanner(r, file),
file: file,
}
p.next()

return p
}

func (p *Parser) ParseFile() (*ast.File, error) {
Expand Down Expand Up @@ -56,12 +54,7 @@ func (p *Parser) error(pos token.Pos, msg string) {
}

func (p *Parser) next() {
if p.s.Scan() {
// TODO: p.pos
p.tok, p.lit = p.s.Token(), p.s.Literal()
} else {
p.tok = token.EOF
}
p.pos, p.tok, p.lit = p.s.Scan()
}

func (p *Parser) parseFile() *ast.File {
Expand Down
4 changes: 3 additions & 1 deletion parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ var _ = Describe("Parser", func() {

_, err := p.ParseFile()

Expect(err).To(MatchError("expected 'IDENT'"))
Expect(err).To(MatchError(
ContainSubstring("expected 'IDENT'"),
))
})
})
86 changes: 42 additions & 44 deletions scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,13 @@ type Scanner struct {
}

func NewScanner(r io.Reader, file *token.File) *Scanner {
scanner := bufio.NewScanner(r)
scanner.Split(ScanTokens)

s := &Scanner{
s: bufio.NewScanner(r),
s: scanner,
file: file,
}
s.s.Split(ScanTokens)
s.next()

return s
Expand All @@ -35,97 +37,93 @@ func (s Scanner) Err() error {
return s.s.Err()
}

func (s Scanner) Token() token.Token {
return s.tok
}

func (s Scanner) Literal() string {
return s.lit
func (s Scanner) Position(pos token.Pos) token.Position {
return token.PositionFor(s.file, pos)
}

func (s Scanner) Pos() token.Pos {
return s.file.Pos(s.offset)
}

func (s *Scanner) Scan() bool {
func (s *Scanner) Scan() (pos token.Pos, tok token.Token, lit string) {
if s.done {
s.tok = token.EOF
return false
pos = s.file.Pos(s.offset)
tok = token.EOF
return
}

s.skipWhitespace()

// current token start
pos = s.file.Pos(s.offset)
var atNewline bool

s.skipWhitespace()
switch txt := s.s.Text(); {
case token.IsIdentifier(txt):
s.lit = txt
lit = txt
s.next()
if len(txt) > 1 {
s.tok = token.Lookup(txt)
tok = token.Lookup(txt)
} else {
s.tok = token.IDENT
tok = token.IDENT
}
default:
s.next()
switch txt {
case "=":
s.tok = token.RECURSIVE_ASSIGN
tok = token.RECURSIVE_ASSIGN
case ":=":
s.tok = token.SIMPLE_ASSIGN
tok = token.SIMPLE_ASSIGN
case "::=":
s.tok = token.POSIX_ASSIGN
tok = token.POSIX_ASSIGN
case ":::=":
s.tok = token.IMMEDIATE_ASSIGN
tok = token.IMMEDIATE_ASSIGN
case "?=":
s.tok = token.IFNDEF_ASSIGN
tok = token.IFNDEF_ASSIGN
case "!=":
s.tok = token.SHELL_ASSIGN
tok = token.SHELL_ASSIGN
case ",":
s.tok = token.COMMA
tok = token.COMMA
case "\n":
atNewline = true
s.tok = token.NEWLINE
tok = token.NEWLINE
case "\t":
s.tok = token.TAB
tok = token.TAB
case "(":
s.tok = token.LPAREN
tok = token.LPAREN
case ")":
s.tok = token.RPAREN
tok = token.RPAREN
case "{":
s.tok = token.LBRACE
tok = token.LBRACE
case "}":
s.tok = token.RBRACE
tok = token.RBRACE
case "$":
s.tok = token.DOLLAR
tok = token.DOLLAR
case ":":
s.tok = token.COLON
tok = token.COLON
case ";":
s.tok = token.SEMI
tok = token.SEMI
case "|":
s.tok = token.PIPE
tok = token.PIPE
case "#":
// TODO
// s.lit = s.scanComment()
s.tok = token.COMMENT
tok = token.COMMENT
default:
s.tok = token.UNSUPPORTED
tok = token.UNSUPPORTED
s.lit = txt
}
}

s.next()
if atNewline && s.done {
s.tok = token.EOF
return false
} else {
return true
tok = token.EOF
}

return
}

func (s *Scanner) next() {
s.done = !s.s.Scan()
s.offset = s.rdOffset
if bytes.ContainsRune(s.s.Bytes(), '\n') {
s.file.AddLine(s.offset)
}
s.done = !s.s.Scan()
s.rdOffset += len(s.s.Bytes())
}

Expand Down
Loading

0 comments on commit b378b8d

Please sign in to comment.