Skip to content

Commit

Permalink
tokenizer: next
Browse files Browse the repository at this point in the history
  • Loading branch information
StunxFS committed Nov 19, 2024
1 parent 6f31668 commit 1d38e00
Show file tree
Hide file tree
Showing 2 changed files with 73 additions and 11 deletions.
9 changes: 9 additions & 0 deletions src/compiler/token/mod.v
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,15 @@ fn build_token_str() []string {
return s
}

@[inline]
pub fn lookup(key string) Kind {
return if kind := keywords[key] {
kind
} else {
.ident
}
}

@[inline]
pub fn is_key(key string) bool {
return int(keywords[key]) > 0
Expand Down
75 changes: 64 additions & 11 deletions src/compiler/tokenizer/mod.v
Original file line number Diff line number Diff line change
Expand Up @@ -124,10 +124,7 @@ fn (t &Tokenizer) matches(want string, start_pos int) bool {
fn (t &Tokenizer) peek_token(n int) token.Token {
idx := t.tidx + n
if idx >= t.all_tokens.len {
return token.Token{
kind: .eof
pos: t.current_pos()
}
return t.token_eof()
}
return t.all_tokens[idx]
}
Expand Down Expand Up @@ -182,7 +179,7 @@ fn (nm NumberMode) str() string {
}
}

fn (mut t Tokenizer) read_number_(mode NumberMode) string {
fn (mut t Tokenizer) read_number_mode(mode NumberMode) string {
start := t.pos
if mode != .dec {
t.pos += 2 // skip '0x', '0b', '0o'
Expand Down Expand Up @@ -305,7 +302,7 @@ fn (mut t Tokenizer) read_number_(mode NumberMode) string {
}

fn (mut t Tokenizer) read_number() string {
return t.read_number_(match true {
return t.read_number_mode(match true {
t.matches('0b', t.pos) { .bin }
t.matches('0o', t.pos) { .oct }
t.matches('0x', t.pos) { .hex }
Expand Down Expand Up @@ -394,19 +391,75 @@ fn (mut t Tokenizer) next() token.Token {
cidx := t.tidx
t.tidx++
if cidx >= t.all_tokens.len {
return token.Token{
kind: .eof
pos: t.current_pos()
}
return t.token_eof()
}
return t.all_tokens[cidx]
}
return t.token_eof()
}

@[inline]
fn (t &Tokenizer) token_eof() token.Token {
return token.Token{
kind: .eof
pos: t.current_pos()
}
}

fn (mut t Tokenizer) internal_next() token.Token {
return token.Token{}
for {
if t.is_started {
t.pos++
} else {
t.is_started = true
}
t.skip_whitespace()
if t.pos >= t.text.len {
return t.token_eof()
}
pos := t.current_pos()
ch := t.current_char()
nextc := t.look_ahead(1)
if util.is_valid_name(ch) {
lit := t.read_ident()
return token.Token{
lit: lit
kind: token.lookup(lit)
pos: pos
}
} else if ch.is_digit() {
// decimals with 0 prefix = error
if ch == `0` && nextc.is_digit() {
report.error('leading zeros in decimal integer literals are not permitted',
t.current_pos())
report.help('use an `0o` prefix for octal integers')
}
return token.Token{
lit: t.read_number().replace('_', '')
kind: .number
pos: pos
}
}
match ch {
`'` {
return token.Token{
lit: t.read_char()
kind: .char
pos: pos
}
}
`"` {
return token.Token{
lit: t.read_string()
kind: .string
pos: pos
}
}
else {
report.error('invalid character `${ch}`', pos)
break
}
}
}
return t.token_eof()
}

0 comments on commit 1d38e00

Please sign in to comment.