Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
elliotchance committed Dec 24, 2024
1 parent 336a4e0 commit ece6330
Show file tree
Hide file tree
Showing 2 changed files with 137 additions and 14 deletions.
22 changes: 8 additions & 14 deletions grammar.y
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,10 @@ module vsql
%token OPERATOR_ASTERISK OPERATOR_PLUS OPERATOR_COMMA OPERATOR_MINUS;
%token OPERATOR_PERIOD OPERATOR_SOLIDUS OPERATOR_COLON OPERATOR_LESS_THAN;
%token OPERATOR_GREATER_THAN OPERATOR_DOUBLE_PIPE OPERATOR_NOT_EQUALS;
%token OPERATOR_GREATER_EQUALS OPERATOR_LESS_EQUALS;
%token OPERATOR_GREATER_EQUALS OPERATOR_LESS_EQUALS OPERATOR_SEMICOLON;

// literals
%token LITERAL_IDENTIFIER LITERAL_STRING LITERAL_INTEGER;
%token LITERAL_IDENTIFIER LITERAL_STRING LITERAL_NUMBER;

%start preparable_statement;

Expand Down Expand Up @@ -345,7 +345,7 @@ signed_integer /* Value */ :
| sign unsigned_integer { log("signed_integer_2()") }

unsigned_integer /* string */ :
LITERAL_INTEGER
LITERAL_NUMBER

datetime_literal /* Value */ :
date_literal
Expand Down Expand Up @@ -1641,7 +1641,7 @@ type YYSym = Value | ValueSpecification | ValueExpression | RowValueConstructor
| CommonValueExpression | BooleanTerm | ValueExpressionPrimary
| NumericPrimary | Term | BooleanTest | BooleanPrimary | BooleanPredicand
| NonparenthesizedValueExpressionPrimary | SimpleTable | QueryExpression
| Stmt;
| Stmt | string;

pub struct YYSymType {
pub mut:
Expand All @@ -1653,12 +1653,6 @@ fn log(s string) {
println(s)
}

pub struct Tok {
pub:
token int
sym YYSymType
}

pub struct Lexer {
pub mut:
tokens []Tok
Expand All @@ -1680,11 +1674,11 @@ fn (mut l Lexer) error(s string) {
}

pub fn main_() {
// println(tokenize2("SELECT 'foo' FROM bar WHERE \"baz\" = 12.3"))
tokens := tokenize2("VALUES FALSE")

mut lexer := Lexer{
tokens: [
Tok{token_values, YYSymType{} }
Tok{token_true, YYSymType{} }
]
tokens: tokens
}
mut parser := yy_new_parser()
parser.parse(mut lexer)
Expand Down
129 changes: 129 additions & 0 deletions vsql/lexer.v
Original file line number Diff line number Diff line change
Expand Up @@ -160,3 +160,132 @@ fn is_identifier_char(c rune, is_not_first bool) bool {

return yes
}

pub struct Tok {
pub:
token int
sym YYSymType

Check failure on line 167 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

unknown type `vsql.YYSymType`.

Check failure on line 167 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

unknown type `vsql.YYSymType`.

Check failure on line 167 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

unknown type `vsql.YYSymType`.
}

fn tokenize2(sql_stmt string) []Tok {
mut tokens := []Tok{}
cs := sql_stmt.trim(';').runes()
mut i := 0

next: for i < cs.len {
// Numbers
if cs[i] >= `0` && cs[i] <= `9` {
mut word := ''
for i < cs.len && cs[i] >= `0` && cs[i] <= `9` {
word += '${cs[i]}'
i++
}
tokens << Tok{token_literal_number, YYSymType{value: word}}

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

undefined ident: `token_literal_number`

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

`token_literal_number` (no value) used as value

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

unknown type `vsql.YYSymType`.

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

unknown struct: vsql.YYSymType

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

`vsql.YYSymType{....}` (no value) used as value

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

undefined ident: `token_literal_number`

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

`token_literal_number` (no value) used as value

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

unknown type `vsql.YYSymType`.

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

unknown struct: vsql.YYSymType

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

`vsql.YYSymType{....}` (no value) used as value

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

undefined ident: `token_literal_number`

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

`token_literal_number` (no value) used as value

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

unknown type `vsql.YYSymType`.

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

unknown struct: vsql.YYSymType

Check failure on line 183 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

`vsql.YYSymType{....}` (no value) used as value

// There is a special case for approximate numbers where 'E' is considered
// a separate token in the SQL BNF. However, "e2" should not be treated as
// two tokens, but rather we need to catch this case only when with a
// number token.
if i < cs.len && (cs[i] == `e` || cs[i] == `E`) {
tokens << Tok{token_e, YYSymType{}}

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

undefined ident: `token_e`

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

`token_e` (no value) used as value

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

unknown type `vsql.YYSymType`.

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Windows

unknown struct: vsql.YYSymType

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

undefined ident: `token_e`

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

`token_e` (no value) used as value

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

unknown type `vsql.YYSymType`.

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for Linux

unknown struct: vsql.YYSymType

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

undefined ident: `token_e`

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

`token_e` (no value) used as value

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

unknown type `vsql.YYSymType`.

Check failure on line 190 in vsql/lexer.v

View workflow job for this annotation

GitHub Actions / Build for macOS

unknown struct: vsql.YYSymType
i++
}

continue
}

// Strings
if cs[i] == `'` {
mut word := ''
i++
for i < cs.len && cs[i] != `'` {
word += '${cs[i]}'
i++
}
i++
tokens << Tok{token_literal_string, YYSymType{value: word}}
continue
}

// Delimited identifiers
if cs[i] == `"` {
mut word := ''
i++
for i < cs.len && cs[i] != `"` {
word += '${cs[i]}'
i++
}
i++
tokens << Tok{token_literal_identifier, YYSymType{value: '"${word}"'}}
continue
}

// Operators
multi := {
'<>': token_operator_not_equals
'>=': token_operator_greater_equals
'<=': token_operator_less_equals
'||': token_operator_double_pipe
}
for op, tk in multi {
if cs[i] == op[0] && cs[i + 1] == op[1] {
tokens << Tok{tk, YYSymType{value: op}}
i += 2
continue next
}
}

single := {
`(`: token_operator_left_paren
`)`: token_operator_right_paren
`*`: token_operator_asterisk
`+`: token_operator_plus
`,`: token_operator_comma
`-`: token_operator_minus
`/`: token_operator_solidus
`;`: token_operator_semicolon
`<`: token_operator_less_than
`=`: token_operator_equals
`>`: token_operator_greater_than
`.`: token_operator_period
`:`: token_operator_colon
}
for op, tk in single {
if cs[i] == op {
tokens << Tok{tk, YYSymType{value: op.str()}}
i++
continue next
}
}

// Keyword or regular identifier
mut word := ''
mut is_not_first := false
for i < cs.len && is_identifier_char(cs[i], is_not_first) {
word += '${cs[i]}'
i++
is_not_first = true
}

if word == '' {
i++
continue
}

upper_word := word.to_upper()
mut found := false
for tok_pos, tok_name in yy_toknames {
if tok_name == upper_word {
tokens << Tok{tok_pos + 57343, YYSymType{value: upper_word}}
found = true
break
}
}

if !found {
Tok{token_literal_identifier, YYSymType{value: word}}
}
}

return tokens
}

0 comments on commit ece6330

Please sign in to comment.