-
Notifications
You must be signed in to change notification settings - Fork 3
/
tokeniser.go
176 lines (149 loc) · 4.11 KB
/
tokeniser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
package ohmyglob
import (
"bytes"
"io"
)
type tc uint8
const (
// An unknown component; returned if there is an error scanning or there are no more tokens
tcUnknown = tc(0x0)
// A string literal
tcLiteral = tc(0x1)
// An Escaper
tcEscaper = tc(0x2)
// Any characters, aside from the separator
tcStar = tc(0x3)
// A globstar component (tc = type component)
tcGlobStar = tc(0x4)
// Any single character, aside from the separator
tcAny = tc(0x5)
// A separator
tcSeparator = tc(0x6)
)
// Tokenises a glob input; implements an API very similar to that of bufio.Scanner (though is not identical)
type globTokeniser struct {
input io.RuneScanner
globOptions *Options
token string
tokenType tc
err error
hasPeek bool
peekToken string
peekTokenType tc
peekErr error
}
func newGlobTokeniser(input io.RuneScanner, globOptions *Options) *globTokeniser {
return &globTokeniser{
input: input,
globOptions: globOptions,
}
}
// Advances by a single token
func (g *globTokeniser) parse(lastTokenType tc) (string, tc, error) {
var err error
tokenBuf := new(bytes.Buffer)
tokenType := tcUnknown
escaped := lastTokenType == tcEscaper
for {
var r rune
r, _, err = g.input.ReadRune()
if err != nil {
break
}
runeType := tcUnknown
switch r {
case Escaper:
runeType = tcEscaper
case '*':
if tokenType == tcStar {
runeType = tcGlobStar
tokenType = tcGlobStar
} else {
runeType = tcStar
}
case '?':
runeType = tcAny
case g.globOptions.Separator:
runeType = tcSeparator
default:
runeType = tcLiteral
}
if escaped {
// If the last token was an Escaper, this MUST be a literal
runeType = tcLiteral
escaped = false
}
if (tokenType != tcUnknown) && (tokenType != runeType) {
// We've stumbled into the next token; backtrack
g.input.UnreadRune()
break
}
tokenType = runeType
tokenBuf.WriteRune(r)
if tokenType == tcEscaper ||
tokenType == tcGlobStar ||
tokenType == tcAny ||
tokenType == tcSeparator {
// These tokens are standalone; continued consumption must be a separate token
break
}
}
if err == io.EOF && tokenType != tcUnknown {
// If we have a token, we can't have an EOF: we want the EOF on the next pass
err = nil
}
if err != nil {
return "", tcUnknown, err
}
if tokenType == tcEscaper {
// Escapers should never be yielded; recurse to find the next token
return g.parse(tokenType)
}
return tokenBuf.String(), tokenType, err
}
// Scan advances the tokeniser to the next token, which will then be available through the Token method. It returns
// false when the tokenisation stops, either by reaching the end of the input or an error. After Scan returns false,
// the Err method will return any error that occurred during scanning, except that if it was io.EOF, Err will return
// nil.
func (g *globTokeniser) Scan() bool {
if g.hasPeek {
g.token, g.tokenType, g.err = g.peekToken, g.peekTokenType, g.peekErr
} else {
g.token, g.tokenType, g.err = g.parse(g.tokenType)
}
g.peekErr = nil
g.peekToken = ""
g.peekTokenType = tcUnknown
g.hasPeek = false
return g.err == nil
}
// Peek peeks to the next token, making it available as PeekToken(). Next time Scan() is called it will advance the
// tokeniser to the peeked token. If there is already a peaked token, it will not advance.
func (g *globTokeniser) Peek() bool {
if !g.hasPeek {
g.peekToken, g.peekTokenType, g.peekErr = g.parse(g.tokenType)
g.hasPeek = true
}
return g.peekErr == nil
}
// Err returns the first non-EOF error that was encountered by the tokeniser
func (g *globTokeniser) Err() error {
if g.err == io.EOF {
return nil
}
return g.err
}
func (g *globTokeniser) Token() (token string, tokenType tc) {
return g.token, g.tokenType
}
// PeekToken returns the peeked token
func (g *globTokeniser) PeekToken() (token string, tokenType tc) {
return g.peekToken, g.peekTokenType
}
// PeekErr returns the error that will be returned by Err() next time Scan() is called. Peek() must be called first.
func (g *globTokeniser) PeekErr() error {
if g.peekErr == io.EOF {
return nil
}
return g.peekErr
}