Skip to content

Commit e6a5352

Browse files
authored
Sync lexer punctuations with ZetaSQL (#182)
* Sync lexer symbols with ZetaSQL * Revert illegal input character test case
1 parent 560e46c commit e6a5352

File tree

3 files changed

+75
-24
lines changed

3 files changed

+75
-24
lines changed

lexer.go

+41-1
Original file line numberDiff line numberDiff line change
@@ -109,7 +109,9 @@ func (l *Lexer) consumeToken() {
109109
}
110110

111111
switch l.peek(0) {
112-
case '(', ')', '{', '}', ';', ',', '[', ']', '~', '*', '/', '&', '^', '+', '-':
112+
case '(', ')', '{', '}', ';', ',', '[', ']', '~', '*', '/', '&', '^', '%', ':',
113+
// Belows are not yet used in Spanner.
114+
'?', '\\', '$':
113115
l.Token.Kind = token.TokenKind([]byte{l.skip()})
114116
return
115117
case '.':
@@ -151,6 +153,32 @@ func (l *Lexer) consumeToken() {
151153
l.Token.Kind = ">"
152154
}
153155
return
156+
case '+':
157+
switch {
158+
// KW_ADD_ASSIGN in ZetaSQL
159+
case l.peekIs(1, '='):
160+
l.skipN(2)
161+
l.Token.Kind = "+="
162+
default:
163+
l.skip()
164+
l.Token.Kind = "+"
165+
}
166+
return
167+
case '-':
168+
switch {
169+
// KW_SUB_ASSIGN in ZetaSQL
170+
case l.peekIs(1, '='):
171+
l.skipN(2)
172+
l.Token.Kind = "-="
173+
// KW_LAMBDA_ARROW in ZetaSQL
174+
case l.peekIs(1, '>'):
175+
l.skipN(2)
176+
l.Token.Kind = "->"
177+
default:
178+
l.skip()
179+
l.Token.Kind = "-"
180+
}
181+
return
154182
case '=':
155183
switch {
156184
case l.peekIs(1, '>'):
@@ -163,6 +191,9 @@ func (l *Lexer) consumeToken() {
163191
return
164192
case '|':
165193
switch {
194+
case l.peekIs(1, '>'):
195+
l.skipN(2)
196+
l.Token.Kind = "|>"
166197
case l.peekIs(1, '|'):
167198
l.skipN(2)
168199
l.Token.Kind = "||"
@@ -177,7 +208,16 @@ func (l *Lexer) consumeToken() {
177208
l.Token.Kind = "!="
178209
return
179210
}
211+
l.skip()
212+
l.Token.Kind = "!"
213+
return
180214
case '@':
215+
// KW_DOUBLE_AT is not yet used in Cloud Spanner, but used in BigQuery.
216+
if l.peekIs(1, '@') {
217+
l.skipN(2)
218+
l.Token.Kind = "@@"
219+
return
220+
}
181221
if l.peekOk(1) && char.IsIdentStart(l.peek(1)) {
182222
i := 1
183223
for l.peekOk(i) && char.IsIdentPart(l.peek(i)) {

lexer_test.go

+34-21
Original file line numberDiff line numberDiff line change
@@ -11,35 +11,48 @@ import (
1111
. "github.com/cloudspannerecosystem/memefish/token"
1212
)
1313

14+
// Keep same order https://github.com/google/zetasql/blob/master/zetasql/parser/flex_tokenizer.l
1415
var symbols = []string{
15-
".",
16-
",",
17-
";",
1816
"(",
19-
")",
20-
"{",
21-
"}",
2217
"[",
18+
"{",
19+
")",
2320
"]",
24-
"@",
25-
"~",
26-
"+",
27-
"-",
21+
"}",
2822
"*",
29-
"/",
30-
"&",
31-
"^",
32-
"|",
33-
"||",
23+
",",
3424
"=",
35-
"<",
36-
"<<",
25+
"+=",
26+
"-=",
27+
"!=",
3728
"<=",
38-
"<>",
29+
"<<",
30+
"=>",
31+
"->",
32+
"<",
3933
">",
40-
">>",
4134
">=",
42-
"!=",
35+
"||",
36+
"|",
37+
"^",
38+
"&",
39+
"+",
40+
"-",
41+
"/",
42+
"~",
43+
"?",
44+
"!",
45+
"%",
46+
"|>",
47+
"@",
48+
"@@",
49+
".",
50+
":",
51+
"\\",
52+
";",
53+
"$",
54+
"<>", // <> is not a valid token in ZetaSQL, but it is a token in memefish
55+
">>", // >> is not a valid token in ZetaSQL, but it is a token in memefish.
4356
}
4457

4558
var lexerTestCases = []struct {
@@ -132,7 +145,7 @@ var lexerWrongTestCase = []struct {
132145
pos Pos
133146
message string
134147
}{
135-
{"?", 0, "illegal input character: '?'"},
148+
{"\b", 0, "illegal input character: '\\b'"},
136149
{`"foo`, 0, "unclosed string literal"},
137150
{`R"foo`, 1, "unclosed raw string literal"},
138151
{"'foo\n", 0, "unclosed string literal: newline appears in non triple-quoted"},

split_test.go

-2
Original file line numberDiff line numberDiff line change
@@ -69,8 +69,6 @@ func TestSplitRawStatements(t *testing.T) {
6969
want: []*memefish.RawStatement{
7070
{Statement: "SELECT `1;2;3`", End: token.Pos(14)},
7171
}},
72-
// $` may become a valid token in the future, but it's reasonable to check its current behavior.
73-
{desc: "unknown token", input: "SELECT $;", errRe: regexp.MustCompile(`illegal input character: '\$'`)},
7472
} {
7573
t.Run(test.desc, func(t *testing.T) {
7674
stmts, err := memefish.SplitRawStatements("", test.input)

0 commit comments

Comments
 (0)