diff --git a/.clang-format b/.clang-format index 8e22be5..382a3e9 100644 --- a/.clang-format +++ b/.clang-format @@ -3,6 +3,7 @@ AlignAfterOpenBracket: AlwaysBreak AllowShortBlocksOnASingleLine: "Always" AlignConsecutiveShortCaseStatements: Enabled: true +AllowShortCaseLabelsOnASingleLine: 'true' AllowShortEnumsOnASingleLine: 'false' AllowShortIfStatementsOnASingleLine: WithoutElse AllowShortLambdasOnASingleLine: 'true' diff --git a/src/ast.h b/src/ast.h new file mode 100644 index 0000000..41aec95 --- /dev/null +++ b/src/ast.h @@ -0,0 +1,198 @@ +#ifndef WINTER_AST_H +#define WINTER_AST_H + +#include +#include +#include +#include + +#include "lexer.h" + +namespace Winter { + + // pre-declarations + struct Expr; + struct aliasNode; + struct argNode; + struct assignNode; + struct binaryExpr; + struct blockNode; + struct classNode; + struct enumNode; + struct forEachStmt; + struct forStmt; + struct funcCallStmt; + struct funcNode; + struct ifStmt; + struct importNode; + struct interfaceNode; + struct letStmt; + struct methodCallStmt; + struct moduleNode; + struct retStmt; + struct tertiaryExpr; + struct typeDeclarationNode; + struct typeNode; + struct unaryExpr; + struct varDeclarationNode; + + // variants + using Stmt_t = std::variant; + using Expr_t = std::variant; + using ExprStmt_t = std::variant; + using Definitions_t = std::variant; + using Declarations_t = std::variant; + using BlockItem_t = std::variant; + + // definitions + + // ALIAS IDENT EQUAL IDENT SEMICOLON + // ALIAS IDENT EQUAL FUNC... + struct aliasNode { + std::string name; + std::string type; + }; + + // IDENT COLON IDENT + // name type + struct argNode { + std::string name = ""; + std::string type = ""; + }; + + // IDENT EQUAL expr + struct assignNode { + std::string lhs = ""; + Expr_t rhs; + }; + + struct binaryExpr { + Expr_t lhs; + TokenType symbol; + Expr_t rhs; + }; + + struct blockNode { + std::vector items = {}; + }; + + struct classNode { + std::optional interfaceName; + std::vector attributes = {}; + std::vector methods = {}; + }; + + // ENUM LBRACE (IDENT COMMA) RBRACE + struct enumNode { + std::string enumNamespace; + std::vector idents = {}; + }; + + struct forStmt { + letStmt start; + assignNode stop; + unaryExpr step; + blockNode body; + }; + + // FOR LPAREN IDENT COLON IDENT RPAREN BLOCK + struct forEachStmt { + std::string element; + std::string container; + }; + + // IDENT LPAREN (IDENT|EXPR) SEMICOLON + struct funcCallStmt { + std::string ident; + std::vector params; + }; + + // FUNC LPAREN (argNode...) RPAREN IDENT LBRACE blockNode RBRACE + struct funcNode { + std::vector arguments = {}; + std::string returnType; + blockNode block; + }; + + // IF LPAREN Expr_t RPAREN blockNode ELSE [block|IF] + struct ifStmt { + Expr_t expr; + blockNode ifBlock; + std::optional elseBlock; + }; + + // IMPORT IDENT + struct importNode { + std::string modName; + }; + + struct interfaceNode { + std::vector declarations = {}; + }; + + // LET IDENT (COLON IDENT) EQUAL Expr_t SEMICOLON + // LET IDENT (LSQUACKET (IDENT COMMA) RSQUACKET) EQUAL [FUNC|IMPORT|CLASS|INTERFACE] + struct letStmt { + std::string name = ""; + std::vector generics = {}; + std::optional typeLiteral; + std::variant body; + }; + + struct methodCallStmt { + std::string objectName; + funcCallStmt methodCall; + }; + + // MOD, IDENT, SEMICOLON, [let|type {} ] + struct moduleNode { + std::string moduleName = ""; + std::vector contents = {}; + }; + + struct retStmt { + Expr_t expr; + }; + + // NOTE: Only tertiary expr is ?: + struct tertiaryExpr { + Expr_t left; + Expr_t middle; + Expr_t right; + }; + + // TYPE IDENT EQUAL [enum|class|interface] + struct typeNode { + std::string name = ""; + std::vector generics = {}; + std::optional typeLiteral; + std::variant body; + }; + + // TYPE IDENT EQUAL ... + struct typeDeclarationNode { + std::string name; + TokenType type; // func, enum, class + + // func + std::optional> args; + std::optional returnType; + + // class implements interface + std::optional interfaceName; + }; + + struct unaryExpr { + TokenType symbol; + Expr_t operand; + }; + + // LET IDENT COLON IDENT SEMI + struct varDeclarationNode { + std::string name; + std::optional type; + }; + +} // namespace Winter + +#endif // WINTER_AST_H diff --git a/src/lexer.cpp b/src/lexer.cpp index ad9b6ba..593afca 100644 --- a/src/lexer.cpp +++ b/src/lexer.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace Winter { auto Lexer::skipWhitespace() -> void { @@ -11,6 +12,17 @@ namespace Winter { while (std::any_of(whitespace.begin(), whitespace.end(), cmp)) { playhead++; } } + auto Lexer::skipComment() -> void { + if (playhead >= src.size()) { + playhead = src.size(); + return; + } + + auto it = std::ranges::find_if( + src.begin() + playhead, src.end(), [](char c) { return c == '\n'; }); + playhead = (it != src.end()) ? std::distance(src.begin(), it) : src.size(); + } + // TODO: utility? [[nodiscard]] auto between(int min, int max, int val) -> bool { return (min <= val && val <= max); @@ -118,51 +130,34 @@ namespace Winter { src = source; skipWhitespace(); + if (src.at(playhead) == '#') { + skipComment(); + skipWhitespace(); + } + switch (src.at(playhead)) { - case '(': - return lexSingle(TokenType::LPAREN); - case ')': - return lexSingle(TokenType::RPAREN); - case '{': - return lexSingle(TokenType::LBRACE); - case '}': - return lexSingle(TokenType::RBRACE); - case '[': - return lexSingle(TokenType::LSQUACKET); - case ']': - return lexSingle(TokenType::RSQUACKET); - case ':': - return lexSingle(TokenType::COLON); - case ';': - return lexSingle(TokenType::SEMICOLON); - case '+': - return lexSingle(TokenType::PLUS); - case '-': - return lexSingle(TokenType::MINUS); - case '*': - return lexSingle(TokenType::STAR); - case '/': - return lexSingle(TokenType::SLASH); - case ',': - return lexSingle(TokenType::COMMA); - case '.': - return lexDouble('.', TokenType::DOT, TokenType::DOT_DOT); - case '>': - return lexDouble('=', TokenType::GREATER, TokenType::GREATER_EQ); - case '<': - return lexDouble('=', TokenType::LESS, TokenType::LESS_EQ); - case '!': - return lexDouble('=', TokenType::NOT, TokenType::NOT_EQ); - case '&': - return lexDouble('&', TokenType::ERROR, TokenType::AND); - case '|': - return lexDouble('|', TokenType::ERROR, TokenType::OR); - case '\'': - return lexChar(); - case '"': - return lexString(); - default: - break; + case '(': return lexSingle(TokenType::LPAREN); + case ')': return lexSingle(TokenType::RPAREN); + case '{': return lexSingle(TokenType::LBRACE); + case '}': return lexSingle(TokenType::RBRACE); + case '[': return lexSingle(TokenType::LSQUACKET); + case ']': return lexSingle(TokenType::RSQUACKET); + case ':': return lexSingle(TokenType::COLON); + case ';': return lexSingle(TokenType::SEMICOLON); + case '+': return lexDouble('+', TokenType::PLUS, TokenType::PLUS_PLUS); + case '-': return lexDouble('-', TokenType::MINUS, TokenType::MINUS_MINUS); + case '*': return lexSingle(TokenType::STAR); + case '/': return lexSingle(TokenType::SLASH); + case ',': return lexSingle(TokenType::COMMA); + case '.': return lexDouble('.', TokenType::DOT, TokenType::DOT_DOT); + case '>': return lexDouble('=', TokenType::GREATER, TokenType::GREATER_EQ); + case '<': return lexDouble('=', TokenType::LESS, TokenType::LESS_EQ); + case '!': return lexDouble('=', TokenType::NOT, TokenType::NOT_EQ); + case '&': return lexDouble('&', TokenType::ERROR, TokenType::AND); + case '|': return lexDouble('|', TokenType::ERROR, TokenType::OR); + case '\'': return lexChar(); + case '"': return lexString(); + default: break; } if (isNumeric()) { return lexNumeric(); } diff --git a/src/lexer.h b/src/lexer.h index 12ff349..d8af12c 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -24,7 +24,9 @@ namespace Winter { COLON, SEMICOLON, PLUS, + PLUS_PLUS, MINUS, + MINUS_MINUS, STAR, SLASH, COMMA, @@ -47,8 +49,10 @@ namespace Winter { // KEYWORDS ALIAS, + BREAK, CASE, CLASS, + CONTINUE, DEFAULT, ENUM, FALSE, @@ -91,6 +95,8 @@ namespace Winter { const std::unordered_map keywords = { {"alias"sv, TokenType::ALIAS}, + {"break"sv, TokenType::BREAK}, + {"continue"sv, TokenType::CONTINUE}, {"case"sv, TokenType::CASE}, {"class"sv, TokenType::CLASS}, {"default"sv, TokenType::DEFAULT}, @@ -111,6 +117,7 @@ namespace Winter { explicit Lexer() {} auto skipWhitespace() -> void; + auto skipComment() -> void; [[nodiscard]] auto isNumeric() -> bool; [[nodiscard]] auto isLetter() -> bool; [[nodiscard]] auto lexNumeric() -> token_result_t; diff --git a/tests/lexer_test.h b/tests/lexer_test.h index 69645fc..a2a2590 100644 --- a/tests/lexer_test.h +++ b/tests/lexer_test.h @@ -20,6 +20,25 @@ constexpr auto test_skipWhitespace([[maybe_unused]] Willow::Test* test) -> int { return 0; } +constexpr auto test_skipComment([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "# this is a comment\n 0"sv; + L.skipComment(); + + if (L.playhead != 19) { + test->alert("Playhead = " + std::to_string(L.playhead)); + return 1; + } + + L.skipWhitespace(); + if (L.playhead != 22) { + test->alert("Playhead = " + std::to_string(L.playhead)); + return 2; + } + + return 0; +} + constexpr auto test_between([[maybe_unused]] Willow::Test* test) -> int { if (!Winter::between(1, 10, 5)) { return 1; } if (Winter::between(1, 10, 20)) { return 2; } diff --git a/tests/test.cpp b/tests/test.cpp index bbbcd07..5daa790 100644 --- a/tests/test.cpp +++ b/tests/test.cpp @@ -9,6 +9,7 @@ auto main(int argc, char* argv[]) -> int { Willow::registerTests({ // lexer_test.h {"skipWhitespace", test_skipWhitespace}, + {"skipComment", test_skipComment}, {"between", test_between}, {"isNumeric", test_isNumeric}, {"isLetter", test_isLetter},