From c3b4c29094168c1db095455c8bed56ff91c754ad Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Wed, 11 Feb 2026 16:45:47 +0000 Subject: [PATCH 1/8] AST nodes defined --- src/ast.h | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 src/ast.h diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..4d23d45 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,196 @@ +#ifndef WINTER_AST_H +#define WINTER_AST_H + +#include +#include +#include +#include + +#include "lexer.h" + +namespace Winter { + + // pre-declarations + struct Expr; + struct aliasNode; + struct argNode; + struct assignNode; + struct binaryExpr; + struct blockNode; + struct classNode; + struct enumNode; + struct forEachStmt; + struct forStmt; + struct funcCallStmt; + struct funcNode; + struct ifStmt; + struct importNode; + struct interfaceNode; + struct letStmt; + struct methodCallStmt; + struct moduleNode; + struct retStmt; + struct tertiaryExpr; + struct typeDeclarationNode; + struct typeNode; + struct unaryExpr; + struct varDeclarationNode; + + // variants + using Stmt_t = std::variant; + using Expr_t = std::variant; + using ExprStmt_t = std::variant; + using Definitions_t = std::variant; + using Declarations_t = std::variant; + + // definitions + + // ALIAS IDENT EQUAL IDENT SEMICOLON + // ALIAS IDENT EQUAL FUNC... + struct aliasNode { + std::string name; + std::string type; + }; + + // IDENT COLON IDENT + // name type + struct argNode { + std::string name = ""; + std::string type = ""; + }; + + // IDENT EQUAL expr + struct assignNode { + std::string lhs = ""; + Expr_t rhs; + }; + + struct BinaryExpr { + Expr_t lhs; + TokenType symbol; + Expr_t rhs; + }; + + // TODO: could be a block of any other node + struct blockNode {}; + + struct classNode { + std::optional interfaceName; + std::vector attributes = {}; + std::vector methods = {}; + }; + + // ENUM LBRACE (IDENT COMMA) RBRACE + struct enumNode { + std::string enumNamespace; + std::vector idents = {}; + }; + + struct forStmt { + letStmt start; + assignNode stop; + unaryExpr step; + blockNode body; + }; + + // FOR LPAREN IDENT COLON IDENT RPAREN BLOCK + struct forEachStmt { + std::string element; + std::string container; + }; + + // IDENT LPAREN (IDENT|EXPR) SEMICOLON + struct funcCallStmt { + std::string ident; + std::vector params; + }; + + // FUNC LPAREN (argNode...) RPAREN IDENT LBRACE blockNode RBRACE + struct funcNode { + std::vector arguments = {}; + std::string returnType; + blockNode block; + }; + + // IF LPAREN Expr_t RPAREN blockNode ELSE [block|IF] + struct ifStmt { + Expr_t expr; + blockNode ifBlock; + std::optional elseBlock; + }; + + // IMPORT IDENT + struct importNode { + std::string modName; + }; + + struct interfaceNode { + std::vector declarations = {}; + }; + + // LET IDENT (COLON IDENT) EQUAL Expr_t SEMICOLON + // LET IDENT (LSQUACKET (IDENT COMMA) RSQUACKET) EQUAL [FUNC|IMPORT|CLASS|INTERFACE] + struct letStmt { + std::string name = ""; + std::vector generics = {}; + std::optional typeLiteral; + std::variant body; + }; + + struct methodCallStmt { + std::string objectName; + funcCallStmt methodCall; + }; + + // MOD, IDENT, SEMICOLON, [let|type {} ] + struct moduleNode { + std::string moduleName = ""; + std::vector contents = {}; + }; + + struct retStmt { + Expr_t expr; + }; + + // NOTE: Only tertiary expr is ?: + struct tertiaryExpr { + Expr_t left; + Expr_t middle; + Expr_t right; + }; + + // TYPE IDENT EQUAL [enum|class|interface] + struct typeNode { + std::string name = ""; + std::vector generics = {}; + std::optional typeLiteral; + std::variant body; + }; + + // TYPE IDENT EQUAL ... + struct typeDeclarationNode { + std::string name; + TokenType type; // func, enum, class + + // func + std::optional> args; + std::optional returnType; + + // class implements interface + std::optional interfaceName; + }; + + struct unaryExpr { + TokenType symbol; + Expr_t operand; + }; + + // LET IDENT COLON IDENT SEMI + struct varDeclarationNode { + std::string name; + std::optional type; + }; + +} // namespace Winter + +#endif // WINTER_AST_H From 8e0ed67671857292b269caae2be420d6d4ba15fc Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Wed, 11 Feb 2026 16:50:20 +0000 Subject: [PATCH 2/8] add new keywords and handling --- src/lexer.cpp | 4 ++-- src/lexer.h | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/lexer.cpp b/src/lexer.cpp index ad9b6ba..6c9d815 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -136,9 +136,9 @@ namespace Winter { case ';': return lexSingle(TokenType::SEMICOLON); case '+': - return lexSingle(TokenType::PLUS); + return lexDouble('+', TokenType::PLUS, TokenType::PLUS_PLUS); case '-': - return lexSingle(TokenType::MINUS); + return lexDouble('-', TokenType::MINUS, TokenType::MINUS_MINUS); case '*': return lexSingle(TokenType::STAR); case '/': diff --git a/src/lexer.h b/src/lexer.h index 12ff349..bff02e4 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -24,7 +24,9 @@ namespace Winter { COLON, SEMICOLON, PLUS, + PLUS_PLUS, MINUS, + MINUS_MINUS, STAR, SLASH, COMMA, @@ -47,8 +49,10 @@ namespace Winter { // KEYWORDS ALIAS, + BREAK, CASE, CLASS, + CONTINUE, DEFAULT, ENUM, FALSE, @@ -91,6 +95,8 @@ namespace Winter { const std::unordered_map keywords = { {"alias"sv, TokenType::ALIAS}, + {"break"sv, TokenType::BREAK}, + {"continue"sv, TokenType::CONTINUE}, {"case"sv, TokenType::CASE}, {"class"sv, TokenType::CLASS}, {"default"sv, TokenType::DEFAULT}, From 7ebe793dfe94ebb07e7b94cc7c4f8d5172704ed9 Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Wed, 11 Feb 2026 16:51:04 +0000 Subject: [PATCH 3/8] symbol switch on a single line --- .clang-format | 1 + src/lexer.cpp | 66 +++++++++++++++++---------------------------------- 2 files changed, 23 insertions(+), 44 deletions(-) diff --git a/.clang-format b/.clang-format index 8e22be5..382a3e9 100644 --- a/.clang-format +++ b/.clang-format @@ -3,6 +3,7 @@ AlignAfterOpenBracket: AlwaysBreak AllowShortBlocksOnASingleLine: "Always" AlignConsecutiveShortCaseStatements: Enabled: true +AllowShortCaseLabelsOnASingleLine: 'true' AllowShortEnumsOnASingleLine: 'false' AllowShortIfStatementsOnASingleLine: WithoutElse AllowShortLambdasOnASingleLine: 'true' diff --git a/src/lexer.cpp b/src/lexer.cpp index 6c9d815..5a4f8de 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -119,50 +119,28 @@ namespace Winter { skipWhitespace(); switch (src.at(playhead)) { - case '(': - return lexSingle(TokenType::LPAREN); - case ')': - return lexSingle(TokenType::RPAREN); - case '{': - return lexSingle(TokenType::LBRACE); - case '}': - return lexSingle(TokenType::RBRACE); - case '[': - return lexSingle(TokenType::LSQUACKET); - case ']': - return lexSingle(TokenType::RSQUACKET); - case ':': - return lexSingle(TokenType::COLON); - case ';': - return lexSingle(TokenType::SEMICOLON); - case '+': - return lexDouble('+', TokenType::PLUS, TokenType::PLUS_PLUS); - case '-': - return lexDouble('-', TokenType::MINUS, TokenType::MINUS_MINUS); - case '*': - return lexSingle(TokenType::STAR); - case '/': - return lexSingle(TokenType::SLASH); - case ',': - return lexSingle(TokenType::COMMA); - case '.': - return lexDouble('.', TokenType::DOT, TokenType::DOT_DOT); - case '>': - return lexDouble('=', TokenType::GREATER, TokenType::GREATER_EQ); - case '<': - return lexDouble('=', TokenType::LESS, TokenType::LESS_EQ); - case '!': - return lexDouble('=', TokenType::NOT, TokenType::NOT_EQ); - case '&': - return lexDouble('&', TokenType::ERROR, TokenType::AND); - case '|': - return lexDouble('|', TokenType::ERROR, TokenType::OR); - case '\'': - return lexChar(); - case '"': - return lexString(); - default: - break; + case '(': return lexSingle(TokenType::LPAREN); + case ')': return lexSingle(TokenType::RPAREN); + case '{': return lexSingle(TokenType::LBRACE); + case '}': return lexSingle(TokenType::RBRACE); + case '[': return lexSingle(TokenType::LSQUACKET); + case ']': return lexSingle(TokenType::RSQUACKET); + case ':': return lexSingle(TokenType::COLON); + case ';': return lexSingle(TokenType::SEMICOLON); + case '+': return lexDouble('+', TokenType::PLUS, TokenType::PLUS_PLUS); + case '-': return lexDouble('-', TokenType::MINUS, TokenType::MINUS_MINUS); + case '*': return lexSingle(TokenType::STAR); + case '/': return lexSingle(TokenType::SLASH); + case ',': return lexSingle(TokenType::COMMA); + case '.': return lexDouble('.', TokenType::DOT, TokenType::DOT_DOT); + case '>': return lexDouble('=', TokenType::GREATER, TokenType::GREATER_EQ); + case '<': return lexDouble('=', TokenType::LESS, TokenType::LESS_EQ); + case '!': return lexDouble('=', TokenType::NOT, TokenType::NOT_EQ); + case '&': return lexDouble('&', TokenType::ERROR, TokenType::AND); + case '|': return lexDouble('|', TokenType::ERROR, TokenType::OR); + case '\'': return lexChar(); + case '"': return lexString(); + default: break; } if (isNumeric()) { return lexNumeric(); } From d4e9de6c5d757d4ff675d27b7db9b5594cfd084a Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Thu, 12 Feb 2026 07:55:06 +0000 Subject: [PATCH 4/8] logic for skipping comments --- src/lexer.cpp | 17 +++++++++++++++++ src/lexer.h | 1 + tests/lexer_test.h | 19 +++++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/src/lexer.cpp b/src/lexer.cpp index 5a4f8de..593afca 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace Winter { auto Lexer::skipWhitespace() -> void { @@ -11,6 +12,17 @@ namespace Winter { while (std::any_of(whitespace.begin(), whitespace.end(), cmp)) { playhead++; } } + auto Lexer::skipComment() -> void { + if (playhead >= src.size()) { + playhead = src.size(); + return; + } + + auto it = std::ranges::find_if( + src.begin() + playhead, src.end(), [](char c) { return c == '\n'; }); + playhead = (it != src.end()) ? std::distance(src.begin(), it) : src.size(); + } + // TODO: utility? [[nodiscard]] auto between(int min, int max, int val) -> bool { return (min <= val && val <= max); @@ -118,6 +130,11 @@ namespace Winter { src = source; skipWhitespace(); + if (src.at(playhead) == '#') { + skipComment(); + skipWhitespace(); + } + switch (src.at(playhead)) { case '(': return lexSingle(TokenType::LPAREN); case ')': return lexSingle(TokenType::RPAREN); diff --git a/src/lexer.h b/src/lexer.h index bff02e4..d8af12c 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -117,6 +117,7 @@ namespace Winter { explicit Lexer() {} auto skipWhitespace() -> void; + auto skipComment() -> void; [[nodiscard]] auto isNumeric() -> bool; [[nodiscard]] auto isLetter() -> bool; [[nodiscard]] auto lexNumeric() -> token_result_t; diff --git a/tests/lexer_test.h b/tests/lexer_test.h index 69645fc..a29b5a8 100644 --- a/tests/lexer_test.h +++ b/tests/lexer_test.h @@ -20,6 +20,25 @@ constexpr auto test_skipWhitespace([[maybe_unused]] Willow::Test* test) -> int { return 0; } +constexpr auto test_skipComment([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "# this is a comment\n 0"sv; + L.skipComment(); + + if (L.playhead != 20) { + test->alert("Playhead = " + std::to_string(L.playhead)); + return 1; + } + + L.skipWhitespace(); + if (L.playhead != 22) { + test->alert("Playhead = " + std::to_string(L.playhead)); + return 1; + } + + return 0; +} + constexpr auto test_between([[maybe_unused]] Willow::Test* test) -> int { if (!Winter::between(1, 10, 5)) { return 1; } if (Winter::between(1, 10, 20)) { return 2; } From c1108d4413cf337b8441a43c0fd76741395d1487 Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Thu, 12 Feb 2026 07:56:56 +0000 Subject: [PATCH 5/8] add test to runner --- tests/test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test.cpp b/tests/test.cpp index bbbcd07..5daa790 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -9,6 +9,7 @@ auto main(int argc, char* argv[]) -> int { Willow::registerTests({ // lexer_test.h {"skipWhitespace", test_skipWhitespace}, + {"skipComment", test_skipComment}, {"between", test_between}, {"isNumeric", test_isNumeric}, {"isLetter", test_isLetter}, From 076222f34949472c5641285c1bb0ae48ec4d8fe3 Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Thu, 12 Feb 2026 07:58:55 +0000 Subject: [PATCH 6/8] amend test --- tests/lexer_test.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/lexer_test.h b/tests/lexer_test.h index a29b5a8..a2a2590 100644 --- a/tests/lexer_test.h +++ b/tests/lexer_test.h @@ -25,7 +25,7 @@ constexpr auto test_skipComment([[maybe_unused]] Willow::Test* test) -> int { L.src = "# this is a comment\n 0"sv; L.skipComment(); - if (L.playhead != 20) { + if (L.playhead != 19) { test->alert("Playhead = " + std::to_string(L.playhead)); return 1; } @@ -33,7 +33,7 @@ constexpr auto test_skipComment([[maybe_unused]] Willow::Test* test) -> int { L.skipWhitespace(); if (L.playhead != 22) { test->alert("Playhead = " + std::to_string(L.playhead)); - return 1; + return 2; } return 0; From af5a6781915b3e32cdd76bf6762c423d73ed3217 Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Thu, 12 Feb 2026 08:02:19 +0000 Subject: [PATCH 7/8] inconsistency --- src/ast.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ast.h b/src/ast.h index 4d23d45..f9e8343 100644 --- a/src/ast.h +++ b/src/ast.h @@ -65,7 +65,7 @@ namespace Winter { Expr_t rhs; }; - struct BinaryExpr { + struct binaryExpr { Expr_t lhs; TokenType symbol; Expr_t rhs; From e7af95064e169f9b013eb433b6821bc15a457977 Mon Sep 17 00:00:00 2001 From: Ttibsi Date: Thu, 12 Feb 2026 08:02:49 +0000 Subject: [PATCH 8/8] handle blocks --- src/ast.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/ast.h b/src/ast.h index f9e8343..41aec95 100644 --- a/src/ast.h +++ b/src/ast.h @@ -42,6 +42,7 @@ namespace Winter { using ExprStmt_t = std::variant; using Definitions_t = std::variant; using Declarations_t = std::variant; + using BlockItem_t = std::variant; // definitions @@ -71,8 +72,9 @@ namespace Winter { Expr_t rhs; }; - // TODO: could be a block of any other node - struct blockNode {}; + struct blockNode { + std::vector items = {}; + }; struct classNode { std::optional interfaceName;