Skip to content

Commit

Permalink
feat: remove tokenizer
Browse files Browse the repository at this point in the history
BREAKING CHANGE: tokenizer was removed and is expected to be passed to the parser during initialization. The tokenizer is expexted to be an iterable emitting token records `{type: ..., value: ..., loc:...}` with the last token being `{type: "end", loc: ...}`
  • Loading branch information
kollhof committed Nov 27, 2020
1 parent 36c8a75 commit 0dfb653
Show file tree
Hide file tree
Showing 8 changed files with 523 additions and 1,127 deletions.
459 changes: 245 additions & 214 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion src/errors.fnk
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


get_error = fn ctx, msg, token, stack_func:
{tokenizer: {code, filename}} = ctx
{code, filename} = ctx
{start: {line, column}} = token.loc

# TODO: use error from std-lib?
Expand Down
112 changes: 112 additions & 0 deletions src/expressions.fnk
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
{add_error} = import './errors.fnk'


auto = {auto: true}



error_nud = fn ctx:
{curr_token} = ctx
{value} = curr_token

add_error ctx,
'Unexpected token `${value}` at start of expression.'
curr_token



error_led = fn ctx:
{curr_token} = ctx
{value} = curr_token

add_error ctx,
'Cannot use `${value}` as an infix operator.'
curr_token



left_binding = fn token_type: dict:
token_type

nud: fn: error_nud
lbp: fn lbp: fn: lbp
led: fn: error_led



non_binding = fn token_type: dict:
token_type

nud: fn: error_nud
lbp: fn: fn: 0
led: fn: error_led



add_expr = fn expr_builder, lbp_value: fn {expr_builders, ...ctx}:
{next_lbp, lbps, nuds, leds} = expr_builders

final_lbp_value = match lbp_value:
auto: next_lbp
else: lbp_value

{token_type, nud, lbp, led} = expr_builder

dict:
...ctx
expr_builders: dict:
next_lbp: next_lbp + 2
nuds: {...nuds, (token_type): nud final_lbp_value}
lbps: {...lbps, (token_type): lbp final_lbp_value}
leds: {...leds, (token_type): led final_lbp_value}



add_ignorable = fn token_type: fn {igns, ...ctx}:
dict:
...ctx
igns: {...igns, (token_type): true}


add_separator = fn expr_builder: add_expr expr_builder, 0


add_operator = fn expr_builder: add_expr expr_builder, auto
add_identifier = fn expr_builder: add_expr expr_builder, auto
add_literal = fn expr_builder: add_expr expr_builder, auto



is_ignorable = fn ctx, token:
ctx.igns.(token.type) == true



nud = fn ctx:
{curr_token: {type}, expr_builders: {nuds}} = ctx
{(type): nud_fn} = nuds
nud_fn ctx



led = fn ctx, left:
{curr_token: {type}, expr_builders: {leds}} = ctx
{(type): led_fn} = leds
led_fn ctx, left



next_lbp = fn ctx, left:
{next_token: {type}, expr_builders: {lbps}} = ctx
{(type): lbp_fn} = lbps
lbp_fn ctx, left




init_expr_builders = fn: dict:
next_lbp: 2
nuds: {}
lbps: {end: fn: 0}
leds: {}
igns: {}
128 changes: 43 additions & 85 deletions src/parser.fnk
Original file line number Diff line number Diff line change
@@ -1,19 +1,13 @@
{matches} = import '@fink/std-lib/regex.fnk'
{is_empty} = import '@fink/std-lib/iter.fnk'
{next, iter} = import '@fink/std-lib/iter.fnk'

{init_tokenizer, get_next_token, get_text} = import './tokenizer.fnk'
{init_symbols, ignorable, next_lbp, led, nud} = import './symbols.fnk'
{init_expr_builders, is_ignorable, next_lbp, led, nud} = import './expressions.fnk'
{add_error} = import './errors.fnk'



# TODO: should it live here
ingnorable_token = {ignorable: true}



start_token = dict:
start_token: true
start: true
loc: dict:
start: {pos: 0, line: 1, column: 0}
end: {pos: 0, line: 1, column: 0}
Expand All @@ -37,11 +31,6 @@ curr_loc = fn {curr_token}:



curr_is = fn ctx, expected:
expected == curr_value ctx



next_value = fn {next_token}:
next_token.value

Expand All @@ -57,89 +46,54 @@ next_is = fn ctx, expected:



next_is_any = fn ctx, ...expected:
(next_value ctx) in expected



next_matches = fn ctx, regex:
{next_token} = ctx
match next_token:
{end: true}:
false
else:
matches next_token.value, regex



next_is_end = fn ctx, expected_end:
next_is_end = fn ctx:
match ctx:
{next_token: {end: true}}: true
next_is ?, expected_end: true
else: false
{next_token: {type: 'end'}}:
true
# {tokens: {done: true}}:
# console.log '>>>>>'
# true
else:
false



advance = fn ctx:
curr_token = ctx.next_token

[[next_token, tokenizer]] = pipe [, ctx.tokenizer]:
unfold [, tokenizer]:
# TODO: should it return [tokenizer_ctx, next_token]?
[next_token, next_tokenizer] = get_next_token tokenizer

token = match next_token:
ignorable ctx, ?: ingnorable_token
else: next_token

[token, next_tokenizer]
{next_token: curr_token, tokens, ignored_tokens} = ctx

filter [token]:
token != ingnorable_token
match curr_token:
{type: 'end'}:
{...ctx, curr_token}

{...ctx, tokenizer, curr_token, next_token}



advance_expected = fn ctx, ...expected:
match ctx:
next_is_any ?, ...expected:
advance ctx
else:
{value} = ctx.next_token
[, next_ctx] = add_error ctx,
'Expected one of `${expected}` but found `${value}`.'
ctx.next_token
next_ctx
[next_token, next_tokens] = next tokens

match next_token:
is_ignorable ctx, ?:
advance rec:
...ctx
tokens: next_tokens
ignored_tokens: [...ignored_tokens, next_token]

else:
next_ctx = rec:
...ctx
tokens: next_tokens
curr_token
next_token
ignored_tokens

collect_text = fn ctx, stop_at:
{curr_token} = ctx

[text, next_tokenizer] = get_text ctx.tokenizer, curr_token.loc.end, stop_at
[next_token, tokenizer] = get_next_token next_tokenizer

next_ctx = {...ctx, tokenizer, curr_token: text, next_token}

match next_ctx:
next_is_end ?:
[, end_ctx] = add_error next_ctx,
'Unexpected end of code.'
next_ctx.next_token
# TODO: advance end_ctx
[text, end_ctx]
else:
[text, advance next_ctx]
next_ctx



expression = fn ctx, rbp:
match ctx:
next_is_end ?:
add_error ctx, 'Unexpected end of code.', ctx.next_token
add_error ctx, 'Unexpected end of code.', ctx.curr_token

else:

[[left, next_ctx]] = pipe ctx:
advance
nud
Expand All @@ -163,16 +117,20 @@ expression = fn ctx, rbp:



init_parser = fn {code, filename}:
tokenizer = init_tokenizer code, filename
symbols = init_symbols _
init_parser = fn {code, filename, tokens}:
expr_builders = init_expr_builders _
start_loc = {pos: 0, line: 1, column: 0}

dict:
curr_token: start_token
next_token: start_token
code
filename
tokens: iter tokens
curr_token: {loc: {start: start_loc, end: start_loc}}
next_token: {loc: {start: start_loc, end: start_loc}}
ignored_tokens: []
expr_builders
errors: []
tokenizer
symbols




Expand Down
Loading

0 comments on commit 0dfb653

Please sign in to comment.