From f644cce2a71493d3c398196ba036241a981cf4e8 Mon Sep 17 00:00:00 2001 From: MilkeeyCat Date: Sun, 29 Dec 2024 20:50:21 +0200 Subject: [PATCH] feat: diagnostics for lexer --- src/compile.rs | 17 +++++- src/diagnostics.rs | 132 ++++++++++++++++++++++++++++++++++++++++++++ src/lexer/error.rs | 7 --- src/lexer/mod.rs | 56 +++++++++++++------ src/lib.rs | 1 + src/parser/error.rs | 4 +- src/parser/mod.rs | 53 ++++++++++++------ 7 files changed, 225 insertions(+), 45 deletions(-) create mode 100644 src/diagnostics.rs delete mode 100644 src/lexer/error.rs diff --git a/src/compile.rs b/src/compile.rs index 384a2fc..a482306 100644 --- a/src/compile.rs +++ b/src/compile.rs @@ -1,5 +1,6 @@ use crate::{ codegen::{amd64_asm::Amd64Asm, Codegen}, + diagnostics::Diagnostics, lexer::Lexer, lowering::Lowering, parser, Context, @@ -45,8 +46,20 @@ pub fn compile(args: CompileArgs) -> Result<(), Box> { file.read_to_string(&mut source_code)?; - let lexer = Lexer::new(source_code); - let ast = parser::Parser::new(lexer)?.parse()?; + let report_diag_and_exit = |diag: &Diagnostics| { + println!("{diag}"); + + std::process::exit(0x45); + }; + + let mut diagnostics = Diagnostics::new(&source_code); + let lexer = Lexer::new(&source_code); + let ast = parser::Parser::new(lexer, &mut diagnostics)?.parse()?; + + if diagnostics.has_errors() { + report_diag_and_exit(&mut diagnostics); + } + let allocator = Bump::new(); let mut ctx = Context::new(&allocator); diff --git a/src/diagnostics.rs b/src/diagnostics.rs new file mode 100644 index 0000000..358f464 --- /dev/null +++ b/src/diagnostics.rs @@ -0,0 +1,132 @@ +use crate::lexer::span::Span; +use derive_more::derive::Display; + +#[derive(Debug, Display)] +pub enum Diagnostic { + #[display("syntax error: unknown character")] + UnknownChar, +} + +#[derive(Debug, Eq, PartialEq, Display)] +enum Level { + #[display("error")] + Error, + #[display("warning")] + Warning, +} + +#[derive(Debug)] +struct Message { + level: Level, + diag: Diagnostic, + span: Span, +} + +#[derive(Debug)] +pub struct Diagnostics<'src> { + source: &'src str, + messages: Vec, +} + +impl<'src> Diagnostics<'src> { + pub fn new(source: &'src str) -> Self { + Self { + source, + messages: Vec::new(), + } + } + + pub fn error(&mut self, diag: Diagnostic, span: Span) { + self.messages.push(Message { + level: Level::Error, + diag, + span, + }) + } + + pub fn warning(&mut self, diag: Diagnostic, span: Span) { + self.messages.push(Message { + level: Level::Warning, + diag, + span, + }) + } + + pub fn has_errors(&self) -> bool { + self.messages.iter().any(|msg| msg.level == Level::Error) + } + + fn row(&self, col: usize) -> usize { + self.source[..col].chars().filter(|ch| ch == &'\n').count() + } + + fn column(&self, row: usize) -> usize { + self.source[..row] + .chars() + .rev() + .enumerate() + .find_map(|(i, ch)| if ch == '\n' { Some(i) } else { None }) + .unwrap_or(row) + } + + fn lines(&self, span: &Span) -> usize { + self.source[..span.end - span.start] + .chars() + .filter(|ch| ch == &'\n') + .count() + + 1 + } +} + +impl std::fmt::Display for Diagnostics<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + const RED_FG: &'static str = "\x1b[1;31m"; + const YELLOW_FG: &'static str = "\x1b[1;33m"; + const BLUE_FG: &'static str = "\x1b[1;34m"; + const RESET: &'static str = "\x1b[0m"; + + for message in &self.messages { + let color = match message.level { + Level::Error => RED_FG, + Level::Warning => YELLOW_FG, + }; + + writeln!(f, "{color}{}{RESET}: {}", message.level, message.diag)?; + + let col = self.row(message.span.start); + let row = self.column(message.span.start); + + writeln!(f, "notarealfilename.rs:{}:{}", col + 1, row + 1)?; + + if self.lines(&message.span) > 1 { + todo!("Dunno how to handle multiline diagnostics"); + } else { + writeln!( + f, + "{BLUE_FG}{} |{RESET} {}", + col + 1, + self.source.lines().nth(col).unwrap() + )?; + writeln!( + f, + "{}{}", + (0..4).into_iter().map(|_| " ").collect::(), + (0..self.column(message.span.end)) + .into_iter() + .map(|i| { + if (self.column(message.span.start)..self.column(message.span.end)) + .contains(&i) + { + format!("{color}^{RESET}") + } else { + " ".into() + } + }) + .collect::() + )?; + } + } + + Ok(()) + } +} diff --git a/src/lexer/error.rs b/src/lexer/error.rs deleted file mode 100644 index 939b801..0000000 --- a/src/lexer/error.rs +++ /dev/null @@ -1,7 +0,0 @@ -use thiserror::Error; - -#[derive(Error, Debug)] -pub enum Error { - #[error("Failed to parse char {0}")] - UnknownCharacter(char), -} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index aa4aaaa..e225317 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -1,24 +1,39 @@ -mod error; mod token; -pub use error::Error; +use span::Span; pub use token::Token; +pub mod span { + #[derive(Debug)] + pub struct Span { + pub start: usize, + pub end: usize, + } + + #[derive(Debug)] + pub struct Spanned { + pub span: Span, + pub inner: T, + } +} + #[derive(Debug)] -pub struct Lexer { - input: String, +pub struct Lexer<'src> { + input: &'src str, position: usize, read_position: usize, ch: char, + start: usize, } -impl Lexer { - pub fn new(input: String) -> Self { +impl<'src> Lexer<'src> { + pub fn new(input: &'src str) -> Self { let mut lexer = Self { input, ch: '\0', position: 0, read_position: 0, + start: 0, }; lexer.read_char(); @@ -26,6 +41,8 @@ impl Lexer { } fn read_char(&mut self) { + self.start = self.position; + match self.input[self.read_position..].chars().next() { Some(ch) => { self.ch = ch; @@ -85,10 +102,17 @@ impl Lexer { self.read_char(); } } + + fn span(&self) -> Span { + Span { + start: self.start, + end: self.position, + } + } } -impl Iterator for Lexer { - type Item = Result; +impl<'src> Iterator for Lexer<'src> { + type Item = Result; fn next(&mut self) -> Option { self.skip_whitespace(); @@ -218,8 +242,10 @@ impl Iterator for Lexer { _ => Token::Ident(ident), })); } - ch => { - return Some(Err(Error::UnknownCharacter(ch))); + _ => { + self.read_char(); + + return Some(Err(self.span())); } }; @@ -231,11 +257,11 @@ impl Iterator for Lexer { #[cfg(test)] mod test { - use super::{Error, Lexer}; + use super::Lexer; use crate::lexer::Token; #[test] - fn source_into_tokens() -> Result<(), Error> { + fn source_into_tokens() { let input = r#" ident 69 @@ -369,14 +395,12 @@ mod test { Token::Null, ]; - let mut lexer = Lexer::new(input.to_string()); + let mut lexer = Lexer::new(input); for token in tokens { - let next_token = lexer.next().unwrap()?; + let next_token = lexer.next().unwrap().unwrap(); assert_eq!(token, next_token); } - - Ok(()) } } diff --git a/src/lib.rs b/src/lib.rs index fd440b2..6a3f532 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,5 +1,6 @@ pub mod codegen; pub mod compile; +pub mod diagnostics; pub mod ir; pub mod lexer; pub mod lowering; diff --git a/src/parser/error.rs b/src/parser/error.rs index c5b5244..d53e61b 100644 --- a/src/parser/error.rs +++ b/src/parser/error.rs @@ -1,11 +1,9 @@ use super::{OpParseError, Ty}; -use crate::lexer::{self, Token}; +use crate::lexer::Token; use thiserror::Error; #[derive(Error, Debug)] pub enum Error { - #[error(transparent)] - Lexer(#[from] lexer::Error), #[error(transparent)] Type(#[from] TyError), #[error(transparent)] diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 4a96633..15180f6 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -7,7 +7,10 @@ mod types; pub mod expr; -use crate::lexer::{self, Token}; +use crate::{ + diagnostics::{Diagnostic, Diagnostics}, + lexer::{self, span::Span, Token}, +}; pub use error::{Error, TyError}; pub use expr::*; pub use item::{Item, ItemFn, ItemStruct}; @@ -27,23 +30,25 @@ pub struct Variable { #[derive(Debug, Clone, PartialEq)] pub struct Block(pub Vec); -type PrefixFn = fn(&mut Parser) -> Result; -type InfixFn = fn(&mut Parser, left: Expr) -> Result; +type PrefixFn<'a, 'src, T> = fn(&mut Parser<'a, 'src, T>) -> Result; +type InfixFn<'a, 'src, T> = fn(&mut Parser<'a, 'src, T>, left: Expr) -> Result; -pub struct Parser>> { +pub struct Parser<'a, 'src, T: Iterator>> { lexer: T, + diag: &'a mut Diagnostics<'src>, cur_token: Option, peek_token: Option, - prefix_fns: HashMap>, - infix_fns: HashMap>, + prefix_fns: HashMap>, + infix_fns: HashMap>, } -impl>> Parser { - pub fn new(mut lexer: T) -> Result { - Ok(Self { - cur_token: lexer.next().transpose()?, - peek_token: lexer.next().transpose()?, +impl<'a, 'src, T: Iterator>> Parser<'a, 'src, T> { + pub fn new(mut lexer: T, diag: &'a mut Diagnostics<'src>) -> Result { + let mut parser = Self { + cur_token: None, + peek_token: None, lexer, + diag, prefix_fns: HashMap::from([ (Token::Ident(Default::default()), Self::ident as PrefixFn), (Token::String(Default::default()), Self::string_lit), @@ -85,16 +90,28 @@ impl>> Parser { (Token::LParen, Self::func_call_expr), (Token::Bang, Self::macro_call_expr), ]), - }) + }; + + parser.next_token()?; + parser.next_token()?; + + Ok(parser) } fn next_token(&mut self) -> Result, Error> { - let mut token = self.lexer.next().transpose()?; + match self.lexer.next().transpose() { + Ok(mut token) => { + std::mem::swap(&mut self.cur_token, &mut self.peek_token); + std::mem::swap(&mut token, &mut self.peek_token); - std::mem::swap(&mut self.cur_token, &mut self.peek_token); - std::mem::swap(&mut token, &mut self.peek_token); + Ok(token) + } + Err(span) => { + self.diag.error(Diagnostic::UnknownChar, span); - Ok(token) + self.next_token() + } + } } fn cur_token_is(&self, token: &Token) -> bool { @@ -786,6 +803,7 @@ impl>> Parser { mod test { use super::Parser; use crate::{ + diagnostics::Diagnostics, lexer::Lexer, parser::{ BinOp, Error, Expr, ExprBinary, ExprCast, ExprIdent, ExprLit, ExprUnary, IntTy, Stmt, @@ -943,7 +961,8 @@ mod test { ]; for (input, expected) in tests { - let mut parser = Parser::new(Lexer::new(input.to_string())).unwrap(); + let mut diagnostics = Diagnostics::new(input); + let mut parser = Parser::new(Lexer::new(input), &mut diagnostics).unwrap(); let ast = parser.compound_statement().unwrap(); assert_eq!(