From ca7df86b49f3dd7d43c40a2050b85bc8fe1391a0 Mon Sep 17 00:00:00 2001 From: Herman Skogseth Date: Wed, 12 Mar 2025 18:12:36 +0100 Subject: [PATCH 1/4] Lexer --- src/lexer.rs | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/lexer.rs b/src/lexer.rs index fa893c7..4310853 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -148,6 +148,9 @@ impl> Iterator for Lexer { None => Token::Operator(Operator::GreaterThan).with_span(Span::single(c)), }, + '?' => Token::Operator(Operator::QuestionMark).with_span(Span::single(c)), + ':' => Token::Operator(Operator::Colon).with_span(Span::single(c)), + _ => { return Some(Err(LexerError { message: "not a valid token", @@ -220,6 +223,8 @@ pub enum Keyword { Int, Void, Return, + If, + Else, } impl std::fmt::Display for Keyword { @@ -228,6 +233,8 @@ impl std::fmt::Display for Keyword { Self::Int => f.write_str("int"), Self::Void => f.write_str("void"), Self::Return => f.write_str("return"), + Self::If => f.write_str("if"), + Self::Else => f.write_str("else"), } } } @@ -239,6 +246,8 @@ impl FromStr for Keyword { "int" => Ok(Self::Int), "void" => Ok(Self::Void), "return" => Ok(Self::Return), + "if" => Ok(Self::If), + "else" => Ok(Self::Else), _ => Err(()), } } @@ -269,6 +278,9 @@ pub enum Operator { GreaterThan, // > LessOrEqual, // <= GreaterOrEqual, // <= + + QuestionMark, // ? + Colon, // : } impl std::fmt::Display for Operator { @@ -296,6 +308,9 @@ impl std::fmt::Display for Operator { Self::GreaterThan => f.write_str(">"), Self::LessOrEqual => f.write_str("<="), Self::GreaterOrEqual => f.write_str("<="), + + Self::QuestionMark => f.write_str("?"), + Self::Colon => f.write_str(":"), } } } From f30ca41082a3b67a1251043521e5840cf3e2c485 Mon Sep 17 00:00:00 2001 From: Herman Skogseth Date: Wed, 12 Mar 2025 20:08:47 +0100 Subject: [PATCH 2/4] Parser --- src/lexer.rs | 37 ++++++---- src/parser.rs | 178 ++++++++++++++++++++++++++++++------------------ src/tacky.rs | 4 +- src/validate.rs | 7 ++ 4 files changed, 147 insertions(+), 79 deletions(-) diff --git a/src/lexer.rs b/src/lexer.rs index 4310853..e0b2634 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -84,11 +84,11 @@ impl> Iterator for Lexer { Some(Ok(TokenElem { token, span })) } else { let token_elem: TokenElem = match c.char { - '(' => Token::OpenParenthesis.with_span(Span::single(c)), - ')' => Token::CloseParenthesis.with_span(Span::single(c)), - '{' => Token::OpenBrace.with_span(Span::single(c)), - '}' => Token::CloseBrace.with_span(Span::single(c)), - ';' => Token::Semicolon.with_span(Span::single(c)), + '(' => Token::Punct(Punct::OpenParenthesis).with_span(Span::single(c)), + ')' => Token::Punct(Punct::CloseParenthesis).with_span(Span::single(c)), + '{' => Token::Punct(Punct::OpenBrace).with_span(Span::single(c)), + '}' => Token::Punct(Punct::CloseBrace).with_span(Span::single(c)), + ';' => Token::Punct(Punct::Semicolon).with_span(Span::single(c)), // This can be either plus ('+') or increment ('++') '+' => match self.0.next_if(|c| c.char == '+') { @@ -187,13 +187,9 @@ pub struct LexerError { pub enum Token { Identifier(Identifier), Constant(i64), // TODO: Maybe a custom constant type? + Punct(Punct), Keyword(Keyword), Operator(Operator), - OpenParenthesis, - CloseParenthesis, - OpenBrace, - CloseBrace, - Semicolon, } impl Token { @@ -209,6 +205,23 @@ impl std::fmt::Display for Token { Self::Constant(i) => write!(f, "constant with value `{i}`"), Self::Keyword(keyword) => write!(f, "keyword `{keyword}`"), Self::Operator(op) => write!(f, "`{op}` (operator)"), + Self::Punct(p) => write!(f, "{p}"), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Punct { + OpenParenthesis, + CloseParenthesis, + OpenBrace, + CloseBrace, + Semicolon, +} + +impl std::fmt::Display for Punct { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { Self::OpenParenthesis => f.write_str("`(`"), Self::CloseParenthesis => f.write_str("`)`"), Self::OpenBrace => f.write_str("`{`"), @@ -328,10 +341,10 @@ mod tests { Token::Keyword(Keyword::Return), Token::Identifier(Identifier::new("var_1")), Token::Operator(Operator::Asterisk), - Token::OpenParenthesis, + Token::Punct(Punct::OpenParenthesis), Token::Operator(Operator::Minus), Token::Constant(490), - Token::CloseParenthesis, + Token::Punct(Punct::CloseParenthesis), ]; assert_eq!(tokens, expected); diff --git a/src/parser.rs b/src/parser.rs index 636b972..80a16cc 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,7 +1,7 @@ use std::iter::Peekable; use std::vec::IntoIter; -use crate::lexer::{Keyword, Operator, Token, TokenElem}; +use crate::lexer::{Keyword, Operator, Punct, Token, TokenElem}; use crate::{Identifier, Output, Span}; pub fn parse(tokens: Vec, output: &Output) -> Result { @@ -26,6 +26,38 @@ pub enum ParseError { EarlyEnd(&'static str), } +fn take_punct(tokens: &mut TokenIter, expected: Punct, output: &Output) -> Result<(), ParseError> { + let elem = tokens + .next() + .ok_or(ParseError::EarlyEnd("close-parenthesis"))?; + + match elem.token { + Token::Punct(p) if p == expected => Ok(()), + _ => { + output.error(elem.span, format!("expected {expected}")); + Err(ParseError::BadTokens) + } + } +} + +fn take_operator( + tokens: &mut TokenIter, + expected: Operator, + output: &Output, +) -> Result<(), ParseError> { + let elem = tokens + .next() + .ok_or(ParseError::EarlyEnd("close-parenthesis"))?; + + match elem.token { + Token::Operator(o) if o == expected => Ok(()), + _ => { + output.error(elem.span, format!("expected {expected}")); + Err(ParseError::BadTokens) + } + } +} + #[derive(Debug, Clone)] pub struct Program(pub Function); @@ -57,16 +89,7 @@ impl Function { return Err(ParseError::BadTokens); }; - let TokenElem { token, span } = tokens - .next() - .ok_or(ParseError::EarlyEnd("open-parenthesis"))?; - let Token::OpenParenthesis = token else { - output.error( - span, - format!("expected open-parenthesis for function {name}, found {token}"), - ); - return Err(ParseError::BadTokens); - }; + take_punct(tokens, Punct::OpenParenthesis, output)?; assert!(matches!( tokens.next(), @@ -76,32 +99,15 @@ impl Function { }) )); - let TokenElem { token, span } = tokens - .next() - .ok_or(ParseError::EarlyEnd("close-parenthesis"))?; - let Token::CloseParenthesis = token else { - output.error( - span, - format!("expected close-parenthesis for function {name}, found {token}"), - ); - return Err(ParseError::BadTokens); - }; - - let TokenElem { token, span } = tokens.next().ok_or(ParseError::EarlyEnd("open brace"))?; - let Token::OpenBrace = token else { - output.error( - span, - format!("expected open brace for function {name}, found {token}"), - ); - return Err(ParseError::BadTokens); - }; + take_punct(tokens, Punct::CloseParenthesis, output)?; + take_punct(tokens, Punct::OpenBrace, output)?; let mut body = Vec::new(); let mut encountered_bad_tokens = false; loop { let token_elem = tokens.peek().ok_or(ParseError::EarlyEnd("close brace"))?; match token_elem.token { - Token::CloseBrace => break, + Token::Punct(Punct::CloseBrace) => break, _ => match BlockItem::parse(tokens, output) { Ok(block_item) => body.push(block_item), Err(ParseError::BadTokens) => encountered_bad_tokens = true, @@ -114,8 +120,7 @@ impl Function { return Err(ParseError::BadTokens); } - let TokenElem { token, .. } = tokens.next().expect("close brace"); - assert!(matches!(token, Token::CloseBrace)); + take_punct(tokens, Punct::CloseBrace, output)?; Ok(Function { name, body }) } @@ -194,8 +199,12 @@ impl Declaration { .next() .ok_or(ParseError::EarlyEnd("either assignment or semicolon"))?; match token_elem.token { - Token::Operator(Operator::Assignment) => Some(take_expr(tokens, output)?), - Token::Semicolon => None, + Token::Operator(Operator::Assignment) => { + let expr = take_expr(tokens, output)?; + take_punct(tokens, Punct::Semicolon, output)?; + Some(expr) + } + Token::Punct(Punct::Semicolon) => None, _ => { output.error( token_elem.span, @@ -229,28 +238,27 @@ impl Declaration { pub enum Statement { Return(Expression), Expression(Expression), + If { + cond: Expression, + then: Box, + else_: Option>, + }, Null, } fn take_expr(tokens: &mut TokenIter, output: &Output) -> Result { match Expression::parse(tokens, output) { - Ok(expr) => { - let next = tokens.next().ok_or(ParseError::EarlyEnd("statement"))?; - match next.token { - Token::Semicolon => return Ok(expr), - _ => output.error(next.span, String::from("expected semicolon")), - } - } + Ok(expr) => return Ok(expr), Err(ParseError::BadTokens) => {} Err(e @ ParseError::EarlyEnd(_)) => return Err(e), - }; + } // We know keep taking tokens until we find a semicolon let mut span = None; loop { let elem = tokens.next().ok_or(ParseError::EarlyEnd("statement"))?; - if matches!(elem.token, Token::Semicolon) { + if matches!(elem.token, Token::Punct(Punct::Semicolon)) { if let Some(current_span) = span { output.warning(current_span, String::from("not parsed")); } @@ -276,16 +284,36 @@ impl Statement { Token::Keyword(Keyword::Return) => { let _ = tokens.next().expect("token must be return keyword"); let expr = take_expr(tokens, output)?; + take_punct(tokens, Punct::Semicolon, output)?; Ok(Self::Return(expr)) } - Token::Semicolon => { + Token::Punct(Punct::Semicolon) => { let _ = tokens.next().expect("token must be semicolon"); Ok(Self::Null) } + Token::Keyword(Keyword::If) => { + let _ = tokens.next().expect("token must be return keyword"); + + take_punct(tokens, Punct::OpenParenthesis, output)?; + let cond = take_expr(tokens, output)?; + take_punct(tokens, Punct::CloseParenthesis, output)?; + + let then = Box::new(Statement::parse(tokens, output)?); + + let else_ = tokens + .next_if(|t| matches!(t.token, Token::Keyword(Keyword::Else))) + .map(|_t| Statement::parse(tokens, output)) + .transpose()? + .map(Box::new); + + Ok(Self::If { cond, then, else_ }) + } + _ => { let expr = take_expr(tokens, output)?; + take_punct(tokens, Punct::Semicolon, output)?; Ok(Self::Expression(expr)) } } @@ -302,6 +330,7 @@ impl Statement { let _ = expr.emit_tacky(instructions); } Self::Null => {} // nothing to do + Self::If { cond, then, else_ } => todo!(), } } } @@ -319,6 +348,11 @@ pub enum Expression { Unary(UnaryOperator, Box), Binary(BinaryOperator, Box, Box), Assignment(Box, Box), + Conditional { + cond: Box, + if_true: Box, + if_false: Box, + }, } impl Expression { @@ -349,6 +383,16 @@ impl Expression { let right = Self::parse_expr(tokens, precedence, output)?; left = Expression::Assignment(Box::new(left), Box::new(right)); } + BinaryOperator::QuestionMark => { + let middle = Self::parse_expr(tokens, 0, output)?; + take_operator(tokens, Operator::Colon, output)?; + let right = Self::parse_expr(tokens, precedence, output)?; + left = Self::Conditional { + cond: Box::new(left), + if_true: Box::new(middle), + if_false: Box::new(right), + }; + } _ => { let right = Self::parse_expr(tokens, precedence + 1, output)?; left = Expression::Binary(bin_op, Box::new(left), Box::new(right)); @@ -386,22 +430,10 @@ impl Expression { Box::new(Self::parse_factor(tokens, output)?), )), - Token::OpenParenthesis => { + Token::Punct(Punct::OpenParenthesis) => { let expr = Self::parse_expr(tokens, 0, output)?; - - let next_token = tokens - .next() - .ok_or(ParseError::EarlyEnd("close parenthesis"))?; - match next_token.token { - Token::CloseParenthesis => Ok(expr), - t => { - output.error( - next_token.span, - format!("expected close parenthesis, found {t}"), - ); - Err(ParseError::BadTokens) - } - } + take_punct(tokens, Punct::CloseParenthesis, output)?; + Ok(expr) } _ => { @@ -538,6 +570,12 @@ impl Expression { crate::tacky::Value::Variable(var) } + + Self::Conditional { + cond, + if_true, + if_false, + } => todo!(), } } } @@ -582,6 +620,9 @@ pub enum BinaryOperator { LessOrEqual, GreaterThan, GreaterOrEqual, + + // Conditional + QuestionMark, } impl BinaryOperator { @@ -609,6 +650,9 @@ impl BinaryOperator { Token::Operator(Operator::GreaterThan) => Some(BinaryOperator::GreaterThan), Token::Operator(Operator::GreaterOrEqual) => Some(BinaryOperator::GreaterOrEqual), + // Conditional + Token::Operator(Operator::QuestionMark) => Some(BinaryOperator::QuestionMark), + _ => None, } } @@ -621,6 +665,7 @@ impl BinaryOperator { Self::Equal | Self::NotEqual => 30, Self::And => 10, Self::Or => 5, + Self::QuestionMark => 3, Self::Assignment => 1, } } @@ -643,6 +688,7 @@ impl std::fmt::Display for BinaryOperator { Self::LessOrEqual => f.write_str("<="), Self::GreaterThan => f.write_str(">"), Self::GreaterOrEqual => f.write_str(">="), + Self::QuestionMark => f.write_str("?"), } } } @@ -801,11 +847,11 @@ mod tests { [ Token::Constant(10), Token::Operator(Operator::Plus), - Token::OpenParenthesis, + Token::Punct(Punct::OpenParenthesis), Token::Constant(8), Token::Operator(Operator::Minus), Token::Constant(4), - Token::CloseParenthesis, + Token::Punct(Punct::CloseParenthesis), Token::Operator(Operator::Asterisk), Token::Constant(3), ] @@ -849,16 +895,16 @@ mod tests { [ Token::Constant(10), Token::Operator(Operator::Plus), - Token::OpenParenthesis, + Token::Punct(Punct::OpenParenthesis), Token::Constant(8), Token::Operator(Operator::Minus), Token::Constant(4), - Token::CloseParenthesis, + Token::Punct(Punct::CloseParenthesis), Token::Operator(Operator::Asterisk), - Token::OpenParenthesis, + Token::Punct(Punct::OpenParenthesis), Token::Operator(Operator::Minus), Token::Constant(3), - Token::CloseParenthesis, + Token::Punct(Punct::CloseParenthesis), ] ); diff --git a/src/tacky.rs b/src/tacky.rs index 3089a6d..fe196ea 100644 --- a/src/tacky.rs +++ b/src/tacky.rs @@ -203,7 +203,9 @@ impl Instruction { BinaryOperator::LessOrEqual => relational_op!(crate::assembly::CondCode::LE), BinaryOperator::GreaterOrEqual => relational_op!(crate::assembly::CondCode::GE), - BinaryOperator::Assignment => todo!(), + BinaryOperator::Assignment | BinaryOperator::QuestionMark => { + panic!("invalid binary operator for assembly"); + } } } diff --git a/src/validate.rs b/src/validate.rs index 156ae6d..c4d34cb 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -66,6 +66,7 @@ fn resolve_statement( match statement { p::Statement::Return(expr) => resolve_expression(expr, variable_map, output), p::Statement::Expression(expr) => resolve_expression(expr, variable_map, output), + p::Statement::If { cond, then, else_ } => todo!(), p::Statement::Null => Ok(()), // nothing to do } } @@ -107,5 +108,11 @@ fn resolve_expression( } p::Expression::Constant(_) => Ok(()), // nothing to do + + p::Expression::Conditional { + cond, + if_true, + if_false, + } => todo!(), } } From 6da7c846c46aab416daf2dfb0a5bab17d68f6ebf Mon Sep 17 00:00:00 2001 From: Herman Skogseth Date: Wed, 12 Mar 2025 20:19:58 +0100 Subject: [PATCH 3/4] Validation --- src/validate.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/validate.rs b/src/validate.rs index c4d34cb..b9b01df 100644 --- a/src/validate.rs +++ b/src/validate.rs @@ -66,7 +66,16 @@ fn resolve_statement( match statement { p::Statement::Return(expr) => resolve_expression(expr, variable_map, output), p::Statement::Expression(expr) => resolve_expression(expr, variable_map, output), - p::Statement::If { cond, then, else_ } => todo!(), + p::Statement::If { cond, then, else_ } => { + resolve_expression(cond, variable_map, output)?; + resolve_statement(then, variable_map, output)?; + + if let Some(expr) = else_ { + resolve_statement(expr, variable_map, output)?; + } + + Ok(()) + } p::Statement::Null => Ok(()), // nothing to do } } @@ -113,6 +122,11 @@ fn resolve_expression( cond, if_true, if_false, - } => todo!(), + } => { + resolve_expression(cond, variable_map, output)?; + resolve_expression(if_true, variable_map, output)?; + resolve_expression(if_false, variable_map, output)?; + Ok(()) + } } } From 2f6db47aa79955a1234e67866883763b5c90720e Mon Sep 17 00:00:00 2001 From: Herman Skogseth Date: Wed, 12 Mar 2025 20:40:41 +0100 Subject: [PATCH 4/4] Tacky --- src/parser.rs | 57 +++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 2 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 80a16cc..38e6af5 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -330,7 +330,29 @@ impl Statement { let _ = expr.emit_tacky(instructions); } Self::Null => {} // nothing to do - Self::If { cond, then, else_ } => todo!(), + Self::If { cond, then, else_ } => { + let end_label = Identifier::new_label("end"); + let else_label = Identifier::new_label("else"); + + let c = cond.emit_tacky(instructions); + instructions.push(crate::tacky::Instruction::JumpIfZero { + condition: c, + target: else_label.clone(), + }); + + let _ = then.emit_tacky(instructions); + instructions.push(crate::tacky::Instruction::Jump { + target: end_label.clone(), + }); + + instructions.push(crate::tacky::Instruction::Label(else_label)); + + if let Some(expr) = else_ { + expr.emit_tacky(instructions); + } + + instructions.push(crate::tacky::Instruction::Label(end_label)); + } } } } @@ -575,7 +597,38 @@ impl Expression { cond, if_true, if_false, - } => todo!(), + } => { + let return_var = crate::tacky::Variable(Identifier::new_temp()); + let end_label = Identifier::new_label("end"); + let expr2_label = Identifier::new_label("expr2"); + + let c = cond.emit_tacky(instructions); + instructions.push(crate::tacky::Instruction::JumpIfZero { + condition: c, + target: expr2_label.clone(), + }); + + let v1 = if_true.emit_tacky(instructions); + instructions.push(crate::tacky::Instruction::Copy { + src: v1, + dst: return_var.clone(), + }); + instructions.push(crate::tacky::Instruction::Jump { + target: end_label.clone(), + }); + + instructions.push(crate::tacky::Instruction::Label(expr2_label)); + + let v2 = if_false.emit_tacky(instructions); + instructions.push(crate::tacky::Instruction::Copy { + src: v2, + dst: return_var.clone(), + }); + + instructions.push(crate::tacky::Instruction::Label(end_label)); + + crate::tacky::Value::Variable(return_var) + } } } }