Skip to content

Commit

Permalink
tokenizer: read_number
Browse files Browse the repository at this point in the history
  • Loading branch information
StunxFS committed Nov 19, 2024
1 parent e57c5ae commit f4a4aa0
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 11 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
fut-out/*.h
fut-out/*.c
rivetc
tokenizer.py
3 changes: 1 addition & 2 deletions src/compiler/mod.v
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@ module compiler

import compiler.context
import compiler.report

import compiler.tokenizer as _

pub fn run(args []string) {
mut c_ctx := &context.CContext{

Check warning on line 12 in src/compiler/mod.v

View workflow job for this annotation

GitHub Actions / ubuntu-gcc

unused variable: `c_ctx`
options: context.parse_args(args) or { report.error(err.msg()) }
options: context.parse_args(args) or { report.ic_fatal(err.msg()) }
}
}
17 changes: 13 additions & 4 deletions src/compiler/report/mod.v
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
module report

import term
import compiler.token

enum MsgLevel {
note
Expand Down Expand Up @@ -32,22 +33,30 @@ fn format_msg(msg string, level MsgLevel) string {
}

@[inline]
pub fn note(msg string) {
pub fn ic_note(msg string) {
eprintln(format_msg(msg, .note))
}

@[inline]
pub fn warn(msg string) {
pub fn ic_warn(msg string) {
eprintln(format_msg(msg, .warn))
}

@[noreturn]
pub fn error(msg string) {
pub fn ic_error(msg string) {
eprintln(format_msg(msg, .error))
exit(101)
}

@[noreturn]
pub fn ic_error(msg string) {
pub fn ic_fatal(msg string) {
panic(format_msg(msg, .ice))
}

pub fn error(msg string, pos token.Pos) {}

pub fn warn(msg string, pos token.Pos) {}

pub fn note(msg string) {}

pub fn help(msg string) {}
182 changes: 178 additions & 4 deletions src/compiler/tokenizer/mod.v
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,12 @@ module tokenizer
import compiler.context
import compiler.token
import compiler.util
import compiler.report

const lf = 10
const cr = 13
const backslash = `\\`
const num_sep = '_'
const num_sep = `_`

fn is_new_line(ch u8) bool {
return ch in [cr, lf]
Expand All @@ -34,10 +35,10 @@ mut:
tidx int = -1
}

pub fn from_file(ctx &context.CContext, path string) &Tokenizer {
pub fn from_file(ctx &context.CContext, path string) &Tokenizer {
mut t := &Tokenizer{
ctx: ctx
text: util.read_file(path)
ctx: ctx
text: util.read_file(path)
}
t.file = path
t.tokenize_remaining_text()
Expand Down Expand Up @@ -139,6 +140,179 @@ fn (t &Tokenizer) look_ahead(pos int) u8 {
}
}

fn (mut t Tokenizer) read_ident() string {
start := t.pos
for t.pos < t.text.len {
c := t.text[t.pos]
if util.is_valid_name(c) {
t.pos++
continue
}
break
}
lit := t.text[start..t.pos]
t.pos--
return lit
}

enum NumberMode {
bin
oct
hex
dec
}

@[inline]
fn (nm NumberMode) is_valid(c u8) bool {
return match nm {
.bin { c.is_bin_digit() }
.oct { c.is_oct_digit() }
.hex { c.is_hex_digit() }
.dec { c.is_digit() }
}
}

@[inline]
fn (nm NumberMode) str() string {
return match nm {
.bin { 'binary' }
.oct { 'octal' }
.hex { 'hexadecimal' }
.dec { 'decimal' }
}
}

fn (mut t Tokenizer) read_number_(mode NumberMode) string {
start := t.pos
if mode != .dec {
t.pos += 2 // skip '0x', '0b', '0o'
}
if t.pos < t.text.len && t.current_char() == num_sep {
report.error('separator `_` is only valid between digits in a numeric literal',
t.current_pos())
}
for t.pos < t.text.len {
ch := t.current_char()
if ch == num_sep && t.text[t.pos - 1] == num_sep {
report.error('cannot use `_` consecutively in a numeric literal', t.current_pos())
}
if !mode.is_valid(ch) && ch != num_sep {
if mode == .dec && (!ch.is_letter() || ch in [`e`, `E`]) {
break
} else if !ch.is_digit() && !ch.is_letter() {
break
}
report.error('${mode} number has unsuitable digit `{self.current_char()}`',
t.current_pos())
}
t.pos++
}
if t.text[t.pos - 1] == num_sep {
t.pos--
report.error('cannot use `_` at the end of a numeric literal', t.current_pos())
}
if mode != .dec && start + 2 == t.pos {
t.pos--
report.error('number part of this ${mode} is not provided', t.current_pos())
t.pos++
}
if mode == .dec {
mut call_method := false // `true` for, e.g., 5.method(), 5.5.method(), 5e5.method()
mut is_range := false // `true` for, e.g., 5..10
// fractional part
if t.pos < t.text.len && t.text[t.pos] == `.` {
t.pos++
if t.pos < t.text.len {
// 16.6, 16.6.str()
if t.text[t.pos].is_digit() {
for t.pos < t.text.len {
c := t.text[t.pos]
if !c.is_digit() {
if !c.is_letter() || c in [`e`, `E`] {
// 16.6.str()
if c == `.` && t.pos + 1 < t.text.len
&& t.text[t.pos + 1].is_letter() {
call_method = true
}
break
} else {
report.error('number has unsuitable digit `${c}`', t.current_pos())
}
}
}
} else if t.text[t.pos] == `.` {
// 4.. a range
is_range = true
t.pos--
} else if t.text[t.pos] in [`e`, `E`] {
// 6.e6
} else if t.text[t.pos].is_letter() {
// 16.str()
call_method = true
t.pos--
} else {
// 5.
t.pos--
report.error('float literals should have a digit after the decimal point',
t.current_pos())
fl := t.text[start..t.pos]
report.help('use `${fl}.0` instead of `${fl}`')
t.pos++
}
}
}
// exponential part
mut has_exp := false
if t.pos < t.text.len && t.text[t.pos] in [`e`, `E`] {
has_exp = true
t.pos++
if t.pos < t.text.len && t.text[t.pos] in [`-`, `+`] {
t.pos++
}
for t.pos < t.text.len {
c := t.text[t.pos]
if !c.is_digit() {
if !c.is_letter() {
// 6e6.str()
if c == `.` && t.pos + 1 < t.text.len && t.text[t.pos + 1].is_letter() {
call_method = true
}
break
} else {
report.error('this number has unsuitable digit `${c}`', t.current_pos())
}
}
t.pos++
}
}
if t.text[t.pos - 1] in [`e`, `E`] {
t.pos--
report.error('exponent has no digits', t.current_pos())
t.pos++
} else if t.pos < t.text.len && t.text[t.pos] == `.` && !is_range && !call_method {
t.pos--
if has_exp {
report.error('exponential part should be integer', t.current_pos())
} else {
report.error('too many decimal points in number', t.current_pos())
}
t.pos++
}
}
lit := t.text[start..t.pos]
t.pos-- // fix pos
return lit
}

fn (mut t Tokenizer) read_number() string {
return t.read_number_(match true {
t.matches('0b', t.pos) { .bin }
t.matches('0o', t.pos) { .oct }
t.matches('0x', t.pos) { .hex }
else { .dec }
})
}

fn (mut t Tokenizer) next() token.Token {
for {
cidx := t.tidx
Expand Down
7 changes: 6 additions & 1 deletion src/compiler/util/mod.v
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,15 @@ module util
import os
import compiler.report

@[inline]
pub fn is_valid_name(c u8) bool {
return c == `_` || c.is_alnum()
}

pub fn read_file(path string) string {
return skip_bom(os.read_file(path) or {
// we use `ic_error` because this should not happen
report.ic_error(err.msg())
report.ic_fatal(err.msg())
})
}

Expand Down

0 comments on commit f4a4aa0

Please sign in to comment.