From 4eac42316d0100054d86e2b2e36fc6c1ffcaa424 Mon Sep 17 00:00:00 2001 From: InioX Date: Wed, 25 Dec 2024 21:02:57 +0100 Subject: [PATCH] feat: add custom syntax support --- matugen-parser/src/errors/parse.rs | 4 +- matugen-parser/src/lexer.rs | 12 +-- matugen-parser/src/main.rs | 6 +- matugen-parser/src/parser/keywords.rs | 103 ++++++++++++++------------ matugen-parser/src/parser/language.rs | 23 ++++++ matugen-parser/src/parser/mod.rs | 85 +++++++++------------ matugen-parser/src/parser/settings.rs | 61 +++++++++++++++ 7 files changed, 188 insertions(+), 106 deletions(-) create mode 100644 matugen-parser/src/parser/settings.rs diff --git a/matugen-parser/src/errors/parse.rs b/matugen-parser/src/errors/parse.rs index 87ec165..20a67d8 100644 --- a/matugen-parser/src/errors/parse.rs +++ b/matugen-parser/src/errors/parse.rs @@ -34,10 +34,10 @@ impl ParseError<'_> { ParseError { err_type, start: parser.last_bracket_start, - end: parser.prev_token_end, + end: parser.lexer_state.prev_token_end, source: parser.source, filename: &parser.filename, - line_number: parser.lexer.cur_line, + line_number: parser.lexer_state.lexer.cur_line, } } } diff --git a/matugen-parser/src/lexer.rs b/matugen-parser/src/lexer.rs index 8e1ee9a..ee6d6fd 100644 --- a/matugen-parser/src/lexer.rs +++ b/matugen-parser/src/lexer.rs @@ -6,19 +6,21 @@ pub enum Kind { String, Number, Float, - Lbracket, - RBracket, - Dot, - Bar, Space, Colon, NewLine, Eof, Sof, Identifier, + + // SPECIAL TOKENS + LBracket, + RBracket, LessThan, GreaterThan, Asterisk, + Bar, + Dot, } use std::str::FromStr; @@ -119,7 +121,7 @@ impl<'a> Lexer<'a> { } match next_char.unwrap() { - '{' => (Kind::Lbracket, TokenValue::None), + '{' => (Kind::LBracket, TokenValue::None), '}' => (Kind::RBracket, TokenValue::None), ':' => (Kind::Colon, TokenValue::None), '<' => (Kind::LessThan, TokenValue::None), diff --git a/matugen-parser/src/main.rs b/matugen-parser/src/main.rs index dced9b8..fb57401 100644 --- a/matugen-parser/src/main.rs +++ b/matugen-parser/src/main.rs @@ -2,7 +2,7 @@ use core::panic; use std::path::PathBuf; use lexer::Lexer; -use parser::Parser; +use parser::{settings::SyntaxSettings, Parser}; mod errors; mod lexer; @@ -44,7 +44,9 @@ fn main() { // let file_path = "./matugen-parser/example/template.txt"; let src = std::fs::read_to_string(&file_path).unwrap(); - let mut parser = Parser::new(&src, file_path_absolute); + let syntax = SyntaxSettings::new(['{', '{'], ['}', '}'], ['<', '*'], ['>', '*']); + + let mut parser = Parser::new(&src, file_path_absolute, &syntax); // parser.get_keywords(); diff --git a/matugen-parser/src/parser/keywords.rs b/matugen-parser/src/parser/keywords.rs index 46ceb3f..ba56e02 100644 --- a/matugen-parser/src/parser/keywords.rs +++ b/matugen-parser/src/parser/keywords.rs @@ -13,27 +13,28 @@ use super::Parser; impl Parser<'_> { fn get_opening(&mut self) -> Option { - let mut start = self.cur_token().start; + let mut start = self.lexer_state.cur_token().start; - self.bump_any(); + self.lexer_state.bump_any(); while !self.opened { - if self.eat(Kind::Lbracket) { + if self.lexer_state.eat(&self.syntax.keyword_opening[0]) { self.opened = true; self.closed = false; - } else if self.eat(Kind::Eof) { + } else if self.lexer_state.eat(&Kind::Eof) { return None; } - self.bump_while_not(Kind::Lbracket); - start = self.cur_token().start; + self.lexer_state + .bump_while_not(&self.syntax.keyword_opening[1]); + start = self.lexer_state.cur_token().start; } Some(start) } pub fn get_closing(&mut self) -> Result<(), ParseError> { println!("STARTING TO CLOSE"); - self.bump_any(); - if self.eat(Kind::RBracket) { + self.lexer_state.bump_any(); + if self.lexer_state.eat(&self.syntax.keyword_closing[0]) { self.closed = true; self.opened = false; Ok(()) @@ -48,15 +49,23 @@ impl Parser<'_> { pub fn get_keywords(&mut self) -> Vec { let mut vec: Vec = vec![]; - while !self.at(Kind::Eof) { - if !self.at(Kind::Lbracket) { - self.bump_until_not_at(Kind::Lbracket); + while !self.lexer_state.at(&Kind::Eof) { + if !self.lexer_state.at(&self.syntax.keyword_opening[0]) { + self.lexer_state + .bump_until_not_at(&self.syntax.keyword_opening[0]); } // We would only get the second bracket at the start without the -1, // the opening will ALWAYS have two brackets unlike the closing, which // might have an error inside of it (so we dont look ahead for the closing). - self.last_bracket_start = self.get_opening().unwrap() - 1; + + let opening = self.get_opening(); + + if opening.is_none() { + return vec; + } + + self.last_bracket_start = opening.unwrap() - 1; let start = self.start_node(); let mut strings: Vec = vec![]; @@ -82,15 +91,15 @@ impl Parser<'_> { fn get_filter(&mut self) -> Result, ParseError> { let start = self.start_node(); - self.bump_while_not(Kind::String); + self.lexer_state.bump_while_not(&Kind::String); - let name = self.cur_token().clone().value; + let name = self.lexer_state.cur_token().clone().value; let mut filter_args: Vec = vec![]; handle_error(self.collect_filter_args(&mut filter_args)); - if self.at(Kind::RBracket) { + if self.lexer_state.at(&self.syntax.keyword_closing[0]) { handle_error(self.get_closing()); return Ok(Some(FilterDefinition { node: self.finish_node(start), @@ -104,7 +113,7 @@ impl Parser<'_> { filter_name: name, arguments: filter_args, })) - // self.bump_while_not(Kind::RBracket); + // self.lexer_state.bump_while_not(&Kind::RBracket); } fn collect_filter_args( @@ -112,38 +121,39 @@ impl Parser<'_> { arguments: &mut Vec, ) -> Result, ParseError> { // THIS SHOULD BE THE FILTER NAME - self.eat(Kind::String); + self.lexer_state.eat(&Kind::String); - if !self.eat_ignore_spaces(Kind::Colon) { + if !self.lexer_state.eat_ignore_spaces(&Kind::Colon) { println!( "{}", - format!("DOESNT HAVE ANY ARGS: {:?}", self.cur_token()) + format!("DOESNT HAVE ANY ARGS: {:?}", self.lexer_state.cur_token()) .red() .bold() ); - self.bump_while_not(Kind::RBracket) + self.lexer_state + .bump_while_not(&self.syntax.keyword_closing[0]) } else { - // while !self.at(Kind::RBracket) { - // match self.cur_kind() { - // Kind::String => arguments.push(&self.cur_token.value), + // while !self.lexer_state.at(Kind::RBracket) { + // match self.lexer_state.cur_kind() { + // Kind::String => arguments.push(&self.lexer_state.cur_token.value), // Kind::Number => todo!(), // _ => {} // } // } loop { - match self.cur_kind() { + match self.lexer_state.cur_kind() { Kind::Space => { - self.bump_until_not_at(Kind::Space); + self.lexer_state.bump_until_not_at(&Kind::Space); } Kind::String => { - arguments.push(self.cur_token.value.clone()); - self.bump(Kind::String) + arguments.push(self.lexer_state.cur_token.value.clone()); + self.lexer_state.bump(&Kind::String) } Kind::Number => { - arguments.push(self.cur_token.value.clone()); - self.bump(Kind::Number) + arguments.push(self.lexer_state.cur_token.value.clone()); + self.lexer_state.bump(&Kind::Number) } - Kind::RBracket => { + kind if *kind == self.syntax.keyword_closing[1] => { break; } _ => { @@ -172,15 +182,15 @@ impl Parser<'_> { filters: &mut Vec, ) -> Result<(), ParseError> { // Always first string, what comes after we cant know - self.bump_while_not(Kind::String); - strings.push(self.cur_val().clone()); + self.lexer_state.bump_while_not(&Kind::String); + strings.push(self.lexer_state.cur_val().clone()); - self.bump_any(); + self.lexer_state.bump_any(); - while !&self.closed && !self.at(Kind::Eof) { - match &self.cur_kind() { + while !&self.closed && !self.lexer_state.at(&Kind::Eof) { + match &self.lexer_state.cur_kind() { Kind::Dot => { - if self.seen_dot && self.eat(Kind::Dot) { + if self.seen_dot && self.lexer_state.eat(&Kind::Dot) { self.seen_dot = false; return Err(ParseError::new_from_parser( ParseErrorTypes::DoubleDot, @@ -188,16 +198,17 @@ impl Parser<'_> { )); } else { self.seen_dot = true; - self.bump(Kind::Dot); + self.lexer_state.bump(&Kind::Dot); } } Kind::String => { if self.seen_dot { - strings.push(self.cur_token.clone().value); - self.bump(Kind::String); + strings.push(self.lexer_state.cur_token.clone().value); + self.lexer_state.bump(&Kind::String); self.seen_dot = false; } else { - self.bump_while_not(Kind::RBracket); + self.lexer_state + .bump_while_not(&self.syntax.keyword_closing[0]); return Err(ParseError::new_from_parser( ParseErrorTypes::DoubleString, &self, @@ -215,9 +226,9 @@ impl Parser<'_> { Err(e) => eprintln!("{}", e), } } - Kind::RBracket => { + kind if **kind == self.syntax.keyword_closing[0] => { return self.get_closing(); - // if self.eat(Kind::RBracket) { + // if self.lexer_state.eat(Kind::RBracket) { // self.closed = true; // self.opened = false; // println!("closed without filter") @@ -226,11 +237,11 @@ impl Parser<'_> { // break; // } } - Kind::Space => self.bump(Kind::Space), - Kind::NewLine => self.bump(Kind::NewLine), - Kind::Identifier => self.bump(Kind::Identifier), + Kind::Space => self.lexer_state.bump(&Kind::Space), + Kind::NewLine => self.lexer_state.bump(&Kind::NewLine), + Kind::Identifier => self.lexer_state.bump(&Kind::Identifier), _ => { - println!("{:?}", self.cur_token()); + println!("{:?}", self.lexer_state.cur_token()); } } } diff --git a/matugen-parser/src/parser/language.rs b/matugen-parser/src/parser/language.rs index e69de29..394679b 100644 --- a/matugen-parser/src/parser/language.rs +++ b/matugen-parser/src/parser/language.rs @@ -0,0 +1,23 @@ +use crate::lexer::Kind; + +use super::Parser; + +// impl Parser<'_> { +// pub fn get_opening_fn(&mut self) -> Option { +// let mut start = self.cur_token().start; + +// self.bump_any(); + +// while !self.opened { +// if self.eat(Kind::LessThan) { +// self.opened = true; +// self.closed = false; +// } else if self.eat(Kind::Eof) { +// return None; +// } +// self.bump_while_not(Kind::LessThan); +// start = self.cur_token().start; +// } +// Some(start) +// } +// } diff --git a/matugen-parser/src/parser/mod.rs b/matugen-parser/src/parser/mod.rs index 386b9e0..2789792 100644 --- a/matugen-parser/src/parser/mod.rs +++ b/matugen-parser/src/parser/mod.rs @@ -1,14 +1,8 @@ pub mod keywords; pub mod language; +pub mod settings; -use colored::Colorize; -use std::cell::RefCell; -use std::fmt; -use std::iter::Filter; -use std::rc::Rc; - -use crate::errors::parse::{ParseError, ParseErrorTypes}; -use crate::errors::{handle_error, handle_error_panic}; +use settings::SyntaxSettings; use crate::lexer::{Kind, Lexer, Token, TokenValue}; use crate::node::{FilterDefinition, KeywordDefinition, Node, Program, Statement}; @@ -17,66 +11,62 @@ use crate::node::{FilterDefinition, KeywordDefinition, Node, Program, Statement} pub struct Parser<'a> { pub source: &'a str, pub filename: &'a str, - pub lexer: Lexer<'a>, - - /// Current Token consumed from the lexer - pub cur_token: Token, - /// The end range of the previous token - pub prev_token_end: usize, + pub lexer_state: LexerState<'a>, pub opened: bool, pub closed: bool, pub seen_dot: bool, pub last_bracket_start: usize, + pub syntax: &'a SyntaxSettings, +} + +#[derive(Debug)] +pub struct LexerState<'a> { + pub lexer: Lexer<'a>, + pub cur_token: Token, + pub prev_token_end: usize, } impl<'a> Parser<'a> { - /// Create a new parser. - pub fn new(source: &'a str, filename: &'a str) -> Parser<'a> { + pub fn new(source: &'a str, filename: &'a str, syntax: &'a SyntaxSettings) -> Parser<'a> { let mut lexer = Lexer::new(&source); Parser { source, filename, - cur_token: lexer.start(), - lexer, - prev_token_end: 0, + lexer_state: LexerState { + cur_token: lexer.start(), + prev_token_end: 0, + lexer, + }, opened: false, closed: false, seen_dot: false, last_bracket_start: 0, + syntax: &syntax, } } pub fn parse(&mut self) -> Program { let end = self.source.len(); - let statments = self.get_keywords(); + let keyword_statements = self.get_keywords(); Program { node: Node { start: 0, end }, - body: statments, + body: keyword_statements, } } - // fn parse_keyword_statement(&mut self) -> Statement { - // let node = self.start_node(); - // // NOTE: the token returned from the lexer is `Kind::Debugger`, we'll fix this later. - // self.bump_any(); - // Statement::KeywordDeclarationStatement { - // 0: KeywordDeclaration { - // node: self.finish_node(node), - // }, - // } - // } - fn start_node(&mut self) -> Node { - let token = self.cur_token(); + let token = self.lexer_state.cur_token(); Node::new(token.start, 0) } fn finish_node(&self, node: Node) -> Node { - Node::new(node.start, self.prev_token_end) + Node::new(node.start, self.lexer_state.prev_token_end) } +} +impl LexerState<'_> { fn cur_token(&self) -> &Token { &self.cur_token } @@ -89,38 +79,33 @@ impl<'a> Parser<'a> { &self.cur_token.value } - /// Checks if the current index has token `Kind` - fn at(&self, kind: Kind) -> bool { - self.cur_kind() == &kind + fn at(&self, kind: &Kind) -> bool { + self.cur_kind() == kind } - /// Advance if we are at `Kind` - fn bump(&mut self, kind: Kind) { + fn bump(&mut self, kind: &Kind) { if self.at(kind) { self.advance(); } } - /// Advance any token fn bump_any(&mut self) { self.advance(); } - fn bump_until_not_at(&mut self, kind: Kind) { - while self.cur_kind() == &kind && !self.at(Kind::Eof) { + fn bump_until_not_at(&mut self, kind: &Kind) { + while self.cur_kind() == kind && !self.at(&Kind::Eof) { self.bump_any() } } - /// Advance any token - fn bump_while_not(&mut self, kind: Kind) { - while self.cur_kind() != &kind && !self.at(Kind::Eof) { + fn bump_while_not(&mut self, kind: &Kind) { + while self.cur_kind() != kind && !self.at(&Kind::Eof) { self.advance(); } } - /// Advance and return true if we are at `Kind`, return false otherwise - fn eat(&mut self, kind: Kind) -> bool { + fn eat(&mut self, kind: &Kind) -> bool { if self.at(kind) { self.advance(); return true; @@ -128,9 +113,8 @@ impl<'a> Parser<'a> { false } - /// Advance and return true if we are at `Kind`, return false otherwise - fn eat_ignore_spaces(&mut self, kind: Kind) -> bool { - self.bump_until_not_at(Kind::Space); + fn eat_ignore_spaces(&mut self, kind: &Kind) -> bool { + self.bump_until_not_at(&Kind::Space); if self.at(kind) { self.advance(); @@ -139,7 +123,6 @@ impl<'a> Parser<'a> { false } - /// Move to the next token fn advance(&mut self) { let token = self.lexer.next_token(); self.prev_token_end = self.cur_token.end; diff --git a/matugen-parser/src/parser/settings.rs b/matugen-parser/src/parser/settings.rs new file mode 100644 index 0000000..9547c47 --- /dev/null +++ b/matugen-parser/src/parser/settings.rs @@ -0,0 +1,61 @@ +use std::str::FromStr; + +use crate::lexer::Kind; + +use super::Parser; + +pub type Delimeter = [Kind; 2]; + +#[derive(Debug)] +pub struct SyntaxSettings { + pub keyword_opening: Delimeter, + pub keyword_closing: Delimeter, + pub function_opening: Delimeter, + pub function_closing: Delimeter, +} + +impl SyntaxSettings { + pub fn new<'a>( + keyword_opening: [char; 2], + keyword_closing: [char; 2], + function_opening: [char; 2], + function_closing: [char; 2], + ) -> SyntaxSettings { + SyntaxSettings { + keyword_opening: Kind::from_char_arr(keyword_opening), + keyword_closing: Kind::from_char_arr(keyword_closing), + function_opening: Kind::from_char_arr(function_opening), + function_closing: Kind::from_char_arr(function_closing), + } + } +} + +impl Default for SyntaxSettings { + fn default() -> SyntaxSettings { + SyntaxSettings { + keyword_opening: Kind::from_char_arr(['{', '{']), + keyword_closing: Kind::from_char_arr(['}', '}']), + function_opening: Kind::from_char_arr(['<', '*']), + function_closing: Kind::from_char_arr(['*', '>']), + } + } +} + +impl Kind { + fn from_char(c: &char) -> Kind { + match c { + '{' => Kind::LBracket, + '}' => Kind::RBracket, + '.' => Kind::Dot, + '|' => Kind::Bar, + '<' => Kind::LessThan, + '>' => Kind::GreaterThan, + '*' => Kind::Asterisk, + _ => Kind::String, + } + } + + fn from_char_arr(arr: [char; 2]) -> Delimeter { + arr.map(|c| Kind::from_char(&c)) + } +}