-
Notifications
You must be signed in to change notification settings - Fork 2
/
tokenizer.go
143 lines (120 loc) · 2.85 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
package mui
// Tokenize -
func Tokenize(raw string) (*Token, error) {
source := createSource(raw)
tokenList := CreateTokenList()
var openString bool
var stringBuffer []rune
var identifierBuffer []rune
for source.hasChar() {
currentChar := source.getChar()
if openString {
if isStringDelimiter(currentChar) {
openString = false
tokenList.Push(createStringToken(stringBuffer, source.currentIdx))
stringBuffer = nil
} else {
stringBuffer = append(stringBuffer, currentChar)
}
} else {
if isLetter(currentChar) {
identifierBuffer = append(identifierBuffer, currentChar)
if !source.hasNextChar() {
tokenList.Push(createIdentifierToken(identifierBuffer, source.currentIdx))
}
} else {
if len(identifierBuffer) > 0 {
tokenList.Push(createIdentifierToken(identifierBuffer, source.currentIdx))
identifierBuffer = nil
}
if isWhitespace(currentChar) {
tokenList.Push(createWhitespaceToken(currentChar, source.currentIdx))
} else if isPunct(currentChar) {
tokenList.Push(createPunctToken(currentChar, source.currentIdx))
} else if isStringDelimiter(currentChar) {
if openString {
openString = false
} else {
openString = true
}
}
}
}
}
return chainTokens(tokenList), nil
}
func chainTokens(tokenList *TokenList) *Token {
var fstToken *Token
for idx, token := range tokenList.Tokens {
if idx == 0 {
fstToken = token
} else {
token.previousToken = tokenList.Tokens[idx-1]
}
if idx+1 < len(tokenList.Tokens) {
token.nextToken = tokenList.Tokens[idx+1]
}
}
return fstToken
}
func createWhitespaceToken(ch rune, idx int) *Token {
token := &Token{
Value: string(ch),
Idx: idx,
}
switch ch {
case ' ':
token.TokenType = Space
case '\t':
token.TokenType = Tab
case '\n':
token.TokenType = NewLine
}
return token
}
func createPunctToken(ch rune, idx int) *Token {
token := &Token{
Value: string(ch),
Idx: idx,
}
switch ch {
case ':':
token.TokenType = Colon
case '(':
token.TokenType = OpenParenthesis
case ')':
token.TokenType = CloseParenthesis
case ',':
token.TokenType = Comma
}
return token
}
func createIdentifierToken(identifierBuffer []rune, idx int) *Token {
return &Token{
Value: string(identifierBuffer),
TokenType: Identifier,
Idx: idx,
}
}
func createStringToken(stringBuffer []rune, idx int) *Token {
return &Token{
Value: string(stringBuffer),
TokenType: String,
Idx: idx,
}
}
func isWhitespace(ch rune) bool {
return ch == ' ' || ch == '\t' || ch == '\n'
}
func isLetter(ch rune) bool {
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z')
}
func isNumber(ch rune) bool {
return ch >= '0' && ch <= '9'
}
func isPunct(ch rune) bool {
return ch == ':' || ch == '(' || ch == ')' || ch == ','
}
func isStringDelimiter(ch rune) bool {
return ch == '\''
}