From 08d7a0da792affea970c1e66d79f9e6bbb24d8bb Mon Sep 17 00:00:00 2001 From: Cr0a3 <127748753+Cr0a3@users.noreply.github.com> Date: Tue, 3 Sep 2024 18:09:57 +0000 Subject: [PATCH] [IR] trying to fix a bug --- src/IR/parser/lexer.rs | 77 ++++++++++++--- src/IR/parser/mod.rs | 53 +++++++++- src/IR/parser/parser.rs | 208 +++++++++++++++++++++++++++++++++++++++- src/IR/typ.rs | 17 ++++ test.ylc | 4 +- tools/ylc/main.rs | 2 +- 6 files changed, 340 insertions(+), 21 deletions(-) diff --git a/src/IR/parser/lexer.rs b/src/IR/parser/lexer.rs index 66f9767e..076d6bb0 100644 --- a/src/IR/parser/lexer.rs +++ b/src/IR/parser/lexer.rs @@ -71,6 +71,30 @@ pub enum TokenType { Func(String), } +impl TokenType { + pub(crate) fn name(&self) -> String { + match self { + TokenType::Dot => "dot", + TokenType::Const => "const", + TokenType::Comma => "comma", + TokenType::Var(_) => "var", + TokenType::Equal => "equal", + TokenType::LParam => "lparam", + TokenType::RParam => "rparam", + TokenType::LBracket => "lbracket", + TokenType::RBracket => "rbracket", + TokenType::LSquare => "lsquare", + TokenType::RSquare => "rsquare", + TokenType::Ident(_) => "ident", + TokenType::String(_) => "string", + TokenType::Int(_) => "int", + TokenType::Declare => "declare", + TokenType::Define => "define", + TokenType::Func(_) => "func", + }.to_string() + } +} + /// An ir token #[derive(Debug, Clone, PartialEq, Eq)] pub struct Token { @@ -96,6 +120,8 @@ pub struct IrLexer { loc: Loc, + no_pop: bool, + keywords: HashMap, /// The output @@ -129,11 +155,13 @@ impl IrLexer { }, out: vec![], + + no_pop: false, } } fn is_at_end(&self) -> bool { - self.current >= (self.input_stream.chars().count() - 1) as u64 + self.current >= (self.input_stream.chars().count()) as u64 } fn update_loc(&mut self) { @@ -149,6 +177,9 @@ impl IrLexer { } fn advance(&mut self) -> Result { + if !self.no_pop { + self.current += 1; + } self.current += 1; let peek = self.peek(); @@ -156,18 +187,24 @@ impl IrLexer { if let Some(peek) = peek { if peek == '\n' { - self.coloumn = 1; + self.coloumn = 0; self.line_no += 1; self.update_line_string(); } else { - self.coloumn += 1; + if !self.no_pop { + self.coloumn += 1; + } out = peek; } } else { Err(IrError::OutOfChars)? } + if self.no_pop { + self.no_pop = false; + } + self.loc.length = self.current - self.start - 1; Ok(out) @@ -201,6 +238,7 @@ impl IrLexer { '(' => ty = Some(TokenType::LParam), '{' => ty = Some(TokenType::LBracket), '[' => ty = Some(TokenType::LSquare), + ')' => ty = Some(TokenType::RParam), '}' => ty = Some(TokenType::RBracket), ']' => ty = Some(TokenType::RSquare), @@ -261,6 +299,10 @@ impl IrLexer { _ => looping = false, } + + if looping { + self.advance()?; + } } Ok(TokenType::Var(out)) @@ -286,6 +328,10 @@ impl IrLexer { _ => out.push(chr), } + + if looping { + self.advance()?; + } } Ok(TokenType::String(out)) @@ -343,7 +389,7 @@ impl IrLexer { })? } - let chr = self.advance()?; + let chr = self.peek().unwrap(); match chr { '0'..='9' => string.push(chr), @@ -352,6 +398,10 @@ impl IrLexer { _ => looping = false, } + + if looping { + self.advance()?; + } } let mut negate = false; @@ -380,8 +430,6 @@ impl IrLexer { fn scan_func(&mut self) -> Result { let mut out = String::new(); - out.push( self.peek().unwrap() ); - let mut looping = true; while looping { @@ -392,18 +440,25 @@ impl IrLexer { })? } - let chr = self.advance()?; + let chr = self.peek().unwrap(); match chr { - '0'..='9' => out.push(chr), - 'a'..='z' => out.push(chr), - 'A'..='Z' => out.push(chr), - '_' => out.push(chr), + '0'..='9' => out.push( chr ), + 'a'..='z' => out.push( chr ), + 'A'..='Z' => out.push( chr ), + '@' => out.push('@'), + '_' => out.push( '_' ), _ => looping = false, } + + if looping { + self.advance()?; + } } + self.no_pop = true; + Ok(TokenType::Func(out)) } } \ No newline at end of file diff --git a/src/IR/parser/mod.rs b/src/IR/parser/mod.rs index e32367a9..d5deae8c 100644 --- a/src/IR/parser/mod.rs +++ b/src/IR/parser/mod.rs @@ -56,19 +56,36 @@ pub enum IrError { loc: Loc, /// The box of the error err: Box, - } + }, + + /// Expected token + ExpectedTokenButFoundAnUnexpectedOne{ + /// the token which was found + found: lexer::Token, + /// the token which was expected + expected: lexer::Token + }, + + /// A unkown type + UnkownType(lexer::Token), } impl Display for IrError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "{}", match self { IrError::UnexpectedToken(token) => { - let mut fab = Support::Error::new("", "", token.loc.line.to_string(), token.loc.coloumn.to_string()); + let mut fab = Support::Error::new("unexpected token", "", token.loc.line.to_string(), token.loc.coloumn.to_string()); fab.deactivateLocationDisplay(); fab.setCodeLine(token.loc.line_string.to_string()); - fab.addWhere("unexpected token", token.loc.coloumn, token.loc.length); + let mut length = token.loc.length; + + if 0 == length { + length = 1; + } + + fab.addWhere("unexpected token", token.loc.coloumn, length); fab.to_string() }, @@ -122,6 +139,36 @@ impl Display for IrError { fab.to_string() } + + IrError::ExpectedTokenButFoundAnUnexpectedOne{found, expected} => { + let mut fab = Support::Error::new("expected a specific token but found another one", "", found.loc.line.to_string(), found.loc.coloumn.to_string()); + + fab.deactivateLocationDisplay(); + + fab.setCodeLine(found.loc.line_string.to_string()); + + let mut length = found.loc.length; + + if 0 == length { + length = 1; + } + + fab.addWhere(format!("expected following token: {:?} but found {:?}", expected.typ.name(), found.typ), found.loc.coloumn, length); + + fab.to_string() + } + + IrError::UnkownType(typ) => { + let mut fab = Support::Error::new("unknown type", "", typ.loc.line.to_string(), typ.loc.coloumn.to_string()); + + fab.deactivateLocationDisplay(); + + fab.setCodeLine(typ.loc.line_string.to_string()); + fab.addWhere("unkown type", typ.loc.coloumn, typ.loc.length); + + fab.to_string() + + } }) } } diff --git a/src/IR/parser/parser.rs b/src/IR/parser/parser.rs index 635af035..0ed31858 100644 --- a/src/IR/parser/parser.rs +++ b/src/IR/parser/parser.rs @@ -1,6 +1,7 @@ -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use crate::prelude::Ir; +use crate::IR::TypeMetadata; use super::lexer::{Loc, Token, TokenType}; use super::IrError; @@ -11,9 +12,9 @@ use super::IrError; #[allow(missing_docs)] pub enum IrStmt { /// a function - Func{body: Vec>, location: Loc}, + Func{name: String, ret: TypeMetadata, args: HashMap, body: Vec<(Box, Loc)>}, /// a constant - Const{data: Vec, location: Loc}, + Const{name: String, data: Vec, location: Loc}, } /// Parses ir tokens into ir statements with location data @@ -65,10 +66,209 @@ impl IrParser { } fn parse_define(&mut self) -> Result { - todo!() + let name; + let mut body= vec![]; + let mut args = HashMap::new(); + + + self.expect( TokenType::Define )?; + self.input.pop_front(); // advance over define + + let ret = self.parse_type()?; + self.input.pop_front(); + + self.expect( TokenType::Func(String::new()) )?; + + let tok = self.current_token()?; + if let TokenType::Func(func) = &tok.typ { + name = func.to_string(); + } else { unreachable!() } + + self.input.pop_front(); + self.expect( TokenType::LParam )?; + + self.input.pop_front(); + + self.expect(TokenType::LBracket)?; + self.input.pop_front(); + + loop { + let current = self.current_token()?; + + if TokenType::RParam == current.typ { + break; + } + + let var_type = self.parse_type()?; + self.input.pop_front(); + + self.expect( TokenType::Var(String::new()) )?; + + let token = self.current_token()?; + + let var_name = match &token.typ { + TokenType::Var(name) => name.to_string(), + + _=> Err(IrError::UndeterminedTokenSequence { + loc: token.loc.clone(), + expected: String::from("%s for a valid variable"), + })? + }; + + args.insert(var_name, var_type ); + } + + loop { + let current = self.current_token()?; + + if TokenType::RBracket == current.typ { + break; + } + + body.push( self.parse_instruction()? ); + } + + Ok(IrStmt::Func { + name: name, + body: body, + args: args, + ret: ret, + }) } fn parse_const(&mut self) -> Result { + self.expect(TokenType::Const)?; + + self.input.pop_front(); + + /* + PARSE NAME + */ + + let name; + + let mut location; + + self.expect( TokenType::Ident(String::new()) )?; + + let tok = self.current_token()?; + if let TokenType::Ident(ident) = &tok.typ { + name = ident.to_string(); + location = tok.loc.clone(); + } else { unreachable!() } + + self.input.pop_front(); + + self.expect(TokenType::Equal)?; + self.input.pop_front(); + + let current = self.current_token()?; + + let mut data = vec![]; + + match ¤t.typ { + TokenType::String(x) => data = x.as_bytes().to_vec(), + TokenType::LSquare => data = self.parse_data_array()?, + + _=> Err(IrError::ExpectedTokenButFoundAnUnexpectedOne { + found: current.clone(), + expected: Token { + typ: TokenType::LSquare, + loc: current.loc.clone() + } + })? + }; + + let current = &self.current_token()?.loc; + + if location.line == current.line { + location.length = current.coloumn - location.coloumn; + } + + self.input.pop_front(); + + Ok(IrStmt::Const { + name: name, + data: data, + location: location, + }) + } + + fn parse_instruction(&mut self) -> Result<(Box, Loc), IrError> { todo!() } + + fn parse_data_array(&mut self) -> Result, IrError> { + self.expect(TokenType::LSquare)?; + self.input.pop_front(); + + let mut data = vec![]; + + loop { + let current = self.current_token()?; + + match ¤t.typ { + TokenType::Int(int) => data.push(*int as u8), + + TokenType::RSquare => break, + _ => Err(IrError::UnexpectedToken(current.clone()))?, + }; + + self.input.pop_front(); + + let current = self.current_token()?; + + if TokenType::Comma == current.typ { + self.input.pop_front(); + } + } + + Ok(data) + } + + fn current_token(&self) -> Result<&Token, IrError> { + if let Some(token) = self.input.front() { + Ok(token) + } else { Err(IrError::OutOfTokens) } + } + + fn expect(&mut self, typ: TokenType) -> Result { + let token = self.current_token()?; + + if typ.name() == token.typ.name() { + Ok(token.clone()) + } else { + Err(IrError::ExpectedTokenButFoundAnUnexpectedOne { + found: token.clone(), + expected: Token { + typ: typ, + loc: token.loc.clone() + } + })? + } + } + + fn parse_type(&mut self) -> Result { + let token = self.current_token()?; + + let mut ident = String::new(); + + if let TokenType::Ident(text) = &token.typ { + ident = text.to_string(); + } else { + Err(IrError::ExpectedTokenButFoundAnUnexpectedOne { + found: token.clone(), + expected: Token { + typ: TokenType::Ident("abc".to_string()), + loc: token.loc.clone() + } + })? + } + + if let Some(typ) = TypeMetadata::parse(ident) { + Ok(typ) + } else { + Err(IrError::UnkownType(token.clone()) ) + } + } } \ No newline at end of file diff --git a/src/IR/typ.rs b/src/IR/typ.rs index 6db6c2b3..ef7c68c8 100644 --- a/src/IR/typ.rs +++ b/src/IR/typ.rs @@ -95,6 +95,23 @@ impl TypeMetadata { _ => false, } } + + /// returns the parsed typemetadata + pub fn parse(string: String) -> Option { + match string.as_str() { + "u16" => Some(TypeMetadata::u16), + "u32" => Some(TypeMetadata::u32), + "u64" => Some(TypeMetadata::u64), + + "i16" => Some(TypeMetadata::i16), + "i32" => Some(TypeMetadata::i32), + "i64" => Some(TypeMetadata::i64), + + "void" => Some(TypeMetadata::Void), + + _ => None, + } + } } impl Display for Type { diff --git a/test.ylc b/test.ylc index 02bae511..7576ed07 100644 --- a/test.ylc +++ b/test.ylc @@ -1,7 +1,7 @@ -const .const0 = [ 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, ,] +const const0 = [ 72, 101, 108, 108, 111, 32, 87, 111, 114, 108, 100, 33, ,] define void @main() { entry: - %0 = ptr .const0 + %0 = ptr const0 %1 = call void printf ptr %0 ret void 0 diff --git a/tools/ylc/main.rs b/tools/ylc/main.rs index 78c32e31..891155d0 100644 --- a/tools/ylc/main.rs +++ b/tools/ylc/main.rs @@ -63,7 +63,7 @@ fn main() -> Result<(), Box> { } if cli.opt("lex") { - println!("Tokens: {:#?}", lexer.out); + println!("Tokens: {:?}", lexer.out); }