diff --git a/.clang-format b/.clang-format index 9560a2e..8e22be5 100644 --- a/.clang-format +++ b/.clang-format @@ -1,8 +1,14 @@ --- AlignAfterOpenBracket: AlwaysBreak AllowShortBlocksOnASingleLine: "Always" +AlignConsecutiveShortCaseStatements: + Enabled: true +AllowShortEnumsOnASingleLine: 'false' AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: 'true' +AllowShortLoopsOnASingleLine: 'true' BasedOnStyle: Chromium +BinPackParameters: 'OnePerLine' ColumnLimit: '100' ConstructorInitializerAllOnOneLineOrOnePerLine: 'true' Cpp11BracedListStyle: 'true' diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 91ec566..f0dfac9 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -9,10 +9,16 @@ jobs: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 - uses: pre-commit/action@v3.0.0 - # unit_tests: - # runs-on: ubuntu-24.04 - # steps: - # - uses: actions/checkout@v4 - # - run: sudo apt-get install clang-19 - # - run: cmake -DCMAKE_CXX_COMPILER=clang++-19 -DRUN_TESTS=true -B build && cmake --build build - # - run: ./build/tests/test_exe + unit_tests: + runs-on: ubuntu-24.04 + steps: + - uses: actions/checkout@v4 + - run: | + curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba + eval "$(bin/micromamba shell hook --shell bash)" + micromamba create -n main -c conda-forge gxx ninja libstdcxx meson + - run: bin/micromamba run -n main meson setup build + env: + CXX: /home/runner/.local/share/mamba/envs/main/bin/g++ + - run: bin/micromamba run -n main meson compile -C build + - run: ./build/test_exe diff --git a/.gitignore b/.gitignore index efd7114..e3c9c31 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,6 @@ iris.log build/ .cache/ .gdb_history + +/subprojects/* +!/subprojects/*.wrap diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 22a0cb4..2cd3c53 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,5 +1,5 @@ --- -exclude: ^include|^build +exclude: ^build|^subprojects repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v6.0.0 @@ -13,10 +13,11 @@ repos: rev: v21.1.8 hooks: - id: clang-format - - repo: https://github.com/cmake-lint/cmake-lint - rev: 1.4.3 + - repo: https://github.com/trim21/pre-commit-mirror-meson + rev: v1.10.1 hooks: - - id: cmakelint + - id: meson-fmt + args: ['--inplace'] - repo: https://github.com/rhysd/actionlint rev: v1.7.10 hooks: diff --git a/meson.build b/meson.build index ef01eba..edd4310 100644 --- a/meson.build +++ b/meson.build @@ -1,8 +1,17 @@ project('winter', 'cpp', default_options: ['cpp_std=c++23']) cpp_flags = ['-Wall', '-Wextra', '-Wconversion', '-Wimplicit-fallthrough'] -src_files = [ - 'src/compiler.cpp' -] +src_files = ['src/compiler.cpp', 'src/lexer.cpp'] winter_src = static_library('winter_src', src_files, cpp_args: cpp_flags) executable('winter', 'src/main.cpp', cpp_args: cpp_flags, link_with: winter_src) + +# tests +willow = dependency('willow', method: 'cmake') +executable( + 'test_exe', + 'tests/test.cpp', + link_with: winter_src, + cpp_args: cpp_flags, + dependencies: willow, + include_directories: 'src', +) diff --git a/src/error.h b/src/error.h new file mode 100644 index 0000000..b6663dc --- /dev/null +++ b/src/error.h @@ -0,0 +1,28 @@ +#ifndef WINTER_ERROR_H +#define WINTER_ERROR_H + +#include +#include +#include + +namespace Winter { + // pre-defined + struct Token; + + enum class ErrType : uint8_t { + Lexer, + NotImplemented + }; + + struct Error { + ErrType type; + std::string msg; + + explicit constexpr Error(ErrType t, std::string m) : type(t), msg(m) {} + }; + + using token_result_t = std::expected; + +} // namespace Winter + +#endif // WINTER_ERROR_H diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..ad9b6ba --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,174 @@ +#include "lexer.h" + +#include +#include +#include + +namespace Winter { + auto Lexer::skipWhitespace() -> void { + static constexpr std::array whitespace = {' ', '\n', '\t'}; + auto cmp = [&](const char c) { return c == src.at(playhead); }; + while (std::any_of(whitespace.begin(), whitespace.end(), cmp)) { playhead++; } + } + + // TODO: utility? + [[nodiscard]] auto between(int min, int max, int val) -> bool { + return (min <= val && val <= max); + } + + [[nodiscard]] auto Lexer::isNumeric() -> bool { + if (playhead >= src.size()) { return false; } + static constexpr std::array digits = {'0', '1', '2', '3', '4', '5', + '6', '7', '8', '9', '.'}; + + auto cmp = [&](const char c) { return c == src.at(playhead); }; + return std::any_of(digits.begin(), digits.end(), cmp); + } + + [[nodiscard]] auto Lexer::isLetter() -> bool { + if (playhead >= src.size()) { return false; } + char c = src.at(playhead); + // [A-Za-z0-9_] + return (between(65, 90, c) || between(97, 122, c) || between(48, 57, c) || c == '_'); + } + + [[nodiscard]] auto Lexer::lexNumeric() -> token_result_t { + Token t = Token(TokenType::NUM_LITERAL, playhead); + while (isNumeric()) { + t.len++; + playhead++; + if (playhead >= src.size()) { break; } + } + + if (t.len == 0) { + return std::unexpected( + Error(ErrType::Lexer, std::format("Invalid numeric found at {}", playhead))); + } + + return t; + } + + [[nodiscard]] auto Lexer::lexSingle(const TokenType type) -> token_result_t { + playhead++; + return Token(type, playhead - 1, 1); + } + + [[nodiscard]] auto Lexer::lexDouble(const char c1, const TokenType single, const TokenType pair) + -> token_result_t { + playhead++; + if (src.at(playhead) == c1) { + playhead++; + return Token(pair, playhead - 2, 2); + } + return Token(single, playhead - 1, 1); + } + + [[nodiscard]] auto Lexer::lexChar() -> token_result_t { + playhead += 2; + if (playhead >= src.size() || src.at(playhead) != '\'') { + return std::unexpected( + Error(ErrType::Lexer, std::format("Malformed char at pos {}", playhead))); + } + + playhead++; + return Token(TokenType::CHAR_LITERAL, playhead - 3, 3); + } + + [[nodiscard]] auto Lexer::lexString() -> token_result_t { + if (src.at(playhead) != '"') { + return std::unexpected( + Error(ErrType::Lexer, "Parsing string started at invalid location")); + } + + std::size_t strlen = 1; + playhead++; + while (src.at(playhead) != '"') { + if (playhead >= src.size()) { break; } + + strlen++; + playhead++; + + if (playhead > src.size()) { + return std::unexpected(Error(ErrType::Lexer, "Unclosed string")); + } + } + + // Include the closing quote + strlen++; + playhead++; + + return Token(TokenType::STR_LITERAL, playhead - strlen, strlen); + } + + [[nodiscard]] auto Lexer::lexIdentKeyword() -> std::expected { + const std::size_t start = playhead; + while (isLetter()) { playhead++; } + + TokenType type = TokenType::IDENT; + if (keywords.contains(src.substr(start, playhead - start))) { + type = keywords.at(src.substr(start, playhead - start)); + } + + if (types.contains(src.substr(start, playhead - start))) { type = TokenType::TYPE_LITERAL; } + + return Token(type, start, playhead - start); + } + + [[nodiscard]] auto Lexer::operator()(std::string_view source) -> token_result_t { + src = source; + skipWhitespace(); + + switch (src.at(playhead)) { + case '(': + return lexSingle(TokenType::LPAREN); + case ')': + return lexSingle(TokenType::RPAREN); + case '{': + return lexSingle(TokenType::LBRACE); + case '}': + return lexSingle(TokenType::RBRACE); + case '[': + return lexSingle(TokenType::LSQUACKET); + case ']': + return lexSingle(TokenType::RSQUACKET); + case ':': + return lexSingle(TokenType::COLON); + case ';': + return lexSingle(TokenType::SEMICOLON); + case '+': + return lexSingle(TokenType::PLUS); + case '-': + return lexSingle(TokenType::MINUS); + case '*': + return lexSingle(TokenType::STAR); + case '/': + return lexSingle(TokenType::SLASH); + case ',': + return lexSingle(TokenType::COMMA); + case '.': + return lexDouble('.', TokenType::DOT, TokenType::DOT_DOT); + case '>': + return lexDouble('=', TokenType::GREATER, TokenType::GREATER_EQ); + case '<': + return lexDouble('=', TokenType::LESS, TokenType::LESS_EQ); + case '!': + return lexDouble('=', TokenType::NOT, TokenType::NOT_EQ); + case '&': + return lexDouble('&', TokenType::ERROR, TokenType::AND); + case '|': + return lexDouble('|', TokenType::ERROR, TokenType::OR); + case '\'': + return lexChar(); + case '"': + return lexString(); + default: + break; + } + + if (isNumeric()) { return lexNumeric(); } + if (isLetter()) { return lexIdentKeyword(); } + + return std::unexpected( + Error(ErrType::Lexer, std::format("Invalid token found at {}", playhead))); + } +}; // namespace Winter diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..12ff349 --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,129 @@ +#ifndef WINTER_LEXER_H +#define WINTER_LEXER_H + +#include +#include +#include +#include +#include +#include + +#include "error.h" + +using namespace std::literals::string_view_literals; + +namespace Winter { + enum class TokenType : uint8_t { + // SYMBOLS + LPAREN, + RPAREN, + LBRACE, + RBRACE, + LSQUACKET, + RSQUACKET, + COLON, + SEMICOLON, + PLUS, + MINUS, + STAR, + SLASH, + COMMA, + DOT, + DOT_DOT, + + // BOOLEAN OPERATORS + GREATER, + GREATER_EQ, + LESS, + LESS_EQ, + EQUAL, + EQUAL_EQ, + NOT, + NOT_EQ, + + // LOGICAL OPERATORS + AND, + OR, + + // KEYWORDS + ALIAS, + CASE, + CLASS, + DEFAULT, + ENUM, + FALSE, + FOR, + FUNC, + IF, + IMPLEMENTS, + INTERFACE, + LET, + MOD, + RETURN, + STATIC, + SWITCH, + TRUE, + TYPE, + + CHAR_LITERAL, + NUM_LITERAL, + STR_LITERAL, + TYPE_LITERAL, + IDENT, + ERROR, + }; + + struct Token { + TokenType type; + std::size_t start; + std::size_t len; + + explicit Token(TokenType t, std::size_t s) : type(t), start(s), len(0) {} + explicit Token(TokenType t, std::size_t s, std::size_t l) : type(t), start(s), len(l) {} + + constexpr auto operator<=>(const Token&) const = default; + }; + + struct Lexer { + std::vector tokens = {}; + std::size_t playhead = 0; + std::string_view src; // temp set in operator() + + const std::unordered_map keywords = { + {"alias"sv, TokenType::ALIAS}, + {"case"sv, TokenType::CASE}, + {"class"sv, TokenType::CLASS}, + {"default"sv, TokenType::DEFAULT}, + {"for"sv, TokenType::FOR}, + {"func"sv, TokenType::FUNC}, + {"if"sv, TokenType::IF}, + {"implements"sv, TokenType::IMPLEMENTS}, + {"interface"sv, TokenType::INTERFACE}, + {"let"sv, TokenType::LET}, + {"mod"sv, TokenType::MOD}, + {"return"sv, TokenType::RETURN}, + {"static"sv, TokenType::STATIC}, + {"switch"sv, TokenType::SWITCH}, + {"type"sv, TokenType::TYPE}, + }; + + std::unordered_map types = {}; + + explicit Lexer() {} + auto skipWhitespace() -> void; + [[nodiscard]] auto isNumeric() -> bool; + [[nodiscard]] auto isLetter() -> bool; + [[nodiscard]] auto lexNumeric() -> token_result_t; + [[nodiscard]] auto lexSingle(const TokenType) -> token_result_t; + [[nodiscard]] auto lexDouble(const char, const TokenType, const TokenType) + -> token_result_t; + [[nodiscard]] auto lexChar() -> token_result_t; + [[nodiscard]] auto lexString() -> token_result_t; + [[nodiscard]] auto lexIdentKeyword() -> token_result_t; + [[nodiscard]] auto operator()(std::string_view src) -> token_result_t; + }; + + [[nodiscard]] auto between(int, int, int) -> bool; +} // namespace Winter + +#endif // WINTER_LEXER_H diff --git a/src/main.cpp b/src/main.cpp index 8614257..7dcf891 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -24,7 +24,8 @@ constexpr auto default_output() -> int { } auto main(int argc, char* argv[]) -> int { - auto args = std::vector(std::from_range, std::span {argv, argc}); + auto args = std::vector( + std::from_range, std::span {argv, static_cast(argc)}); bool enable_debug = false; std::string file = ""; diff --git a/subprojects/willow.wrap b/subprojects/willow.wrap new file mode 100644 index 0000000..b57f8d3 --- /dev/null +++ b/subprojects/willow.wrap @@ -0,0 +1,7 @@ +[wrap-git] +url = https://github.com/ttibsi/willow +revision = HEAD +method = cmake + +[provide] +willow = willow_dep diff --git a/tests/lexer_test.h b/tests/lexer_test.h new file mode 100644 index 0000000..69645fc --- /dev/null +++ b/tests/lexer_test.h @@ -0,0 +1,184 @@ +#include +#include + +#include + +#include "lexer.h" + +using namespace std::literals::string_view_literals; + +constexpr auto test_skipWhitespace([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = " foo"sv; + L.skipWhitespace(); + + if (L.playhead != 3) { + test->alert("Playhead = " + std::to_string(L.playhead)); + return 1; + } + + return 0; +} + +constexpr auto test_between([[maybe_unused]] Willow::Test* test) -> int { + if (!Winter::between(1, 10, 5)) { return 1; } + if (Winter::between(1, 10, 20)) { return 2; } + if (!Winter::between(1, 10, 10)) { return 3; } + + return 0; +} + +constexpr auto test_isNumeric([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "0"sv; + if (!L.isNumeric()) { return 1; } + + L.src = "a"sv; + if (L.isNumeric()) { return 2; } + + L.src = "."sv; + if (!L.isNumeric()) { return 3; } + + return 0; +} + +constexpr auto test_isLetter([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "0"sv; + if (!L.isLetter()) { return 1; } + + L.src = "A"sv; + if (!L.isLetter()) { return 2; } + + L.src = "a"sv; + if (!L.isLetter()) { return 3; } + + L.src = " "sv; + if (L.isLetter()) { return 4; } + + return 0; +} + +constexpr auto test_lexNumeric([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "012"sv; + const auto result = L.lexNumeric(); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::NUM_LITERAL) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 3) { + test->alert("length = " + std::to_string(result.value().len)); + return 4; + } + + return 0; +} + +constexpr auto test_lexSingle([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + const auto result = L.lexSingle(Winter::TokenType::LPAREN); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::LPAREN) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 1) { return 4; } + if (L.playhead != 1) { return 5; } + + return 0; +} + +constexpr auto test_lexDouble([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "<="sv; + const auto result = L.lexDouble('=', Winter::TokenType::LESS, Winter::TokenType::LESS_EQ); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::LESS_EQ) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 2) { return 4; } + if (L.playhead != 2) { return 5; } + + return 0; +} + +constexpr auto test_lexChar([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + // Valid char + L.src = "'v'"sv; + const auto result = L.lexChar(); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::CHAR_LITERAL) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 3) { return 4; } + if (L.playhead != 3) { return 5; } + + // Invalid (no closing quote) + L.src = "'v"sv; + const auto result2 = L.lexChar(); + if (result2.has_value()) { return 6; } + + return 0; +} + +constexpr auto test_lexString([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + L.src = "\"foo bar\""sv; + const auto result = L.lexString(); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::STR_LITERAL) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 9) { + test->alert("String length: " + std::to_string(result.value().len)); + return 4; + } + if (L.playhead != 9) { return 5; } + + // Invalid (no closing quote) + L.src = "\"foo bar"sv; + const auto result2 = L.lexChar(); + if (result2.has_value()) { return 6; } + + return 0; +} + +constexpr auto test_lexIdentKeyword([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + // Basic identifier + L.src = "foo"sv; + const auto result = L.lexIdentKeyword(); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::IDENT) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 3) { return 4; } + if (L.playhead != 3) { return 5; } + + // keyword + L.src = "type"sv; + L.playhead = 0; + const auto result2 = L.lexIdentKeyword(); + + if (!result2.has_value()) { return 6; } + if (result2.value().type != Winter::TokenType::TYPE) { return 7; } + if (result2.value().start != 0) { return 8; } + if (result2.value().len != 4) { return 9; } + if (L.playhead != 4) { return 10; } + + return 0; +} + +constexpr auto test_operator_funcCall([[maybe_unused]] Willow::Test* test) -> int { + auto L = Winter::Lexer(); + const auto result = L("let"); + + if (!result.has_value()) { return 1; } + if (result.value().type != Winter::TokenType::LET) { return 2; } + if (result.value().start != 0) { return 3; } + if (result.value().len != 3) { return 4; } + if (L.playhead != 3) { return 5; } + + return 0; +} diff --git a/tests/test.cpp b/tests/test.cpp new file mode 100644 index 0000000..bbbcd07 --- /dev/null +++ b/tests/test.cpp @@ -0,0 +1,27 @@ +#include +#include + +#include "lexer_test.h" + +auto main(int argc, char* argv[]) -> int { + Willow::PreCommitReporter reporter = {}; + + Willow::registerTests({ + // lexer_test.h + {"skipWhitespace", test_skipWhitespace}, + {"between", test_between}, + {"isNumeric", test_isNumeric}, + {"isLetter", test_isLetter}, + {"lexNumeric", test_lexNumeric}, + {"lexSingle", test_lexSingle}, + {"lexDouble", test_lexDouble}, + {"lexChar", test_lexChar}, + {"lexString", test_lexString}, + {"lexIdentKeyword", test_lexIdentKeyword}, + {"operator()", test_operator_funcCall}, + }); + + if (argc > 1) { return Willow::runSingleTest(std::string(argv[1]), reporter); } + + return Willow::runTests(reporter); +}