From 2e81ef8a74cabf965099145ab4972b608aae5199 Mon Sep 17 00:00:00 2001 From: Edward Palmer <{username}@users.noreply.github.com> Date: Tue, 14 Jan 2025 20:38:41 +0000 Subject: [PATCH 1/4] Switch to using Grammar static methods. --- src/lexer/EucleiaTokenizer.cpp | 15 +-------------- src/lexer/EucleiaTokenizer.hpp | 12 ++++-------- src/parser/EucleiaParser.cpp | 3 ++- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/src/lexer/EucleiaTokenizer.cpp b/src/lexer/EucleiaTokenizer.cpp index 381b3d6..cc0dd40 100644 --- a/src/lexer/EucleiaTokenizer.cpp +++ b/src/lexer/EucleiaTokenizer.cpp @@ -92,11 +92,6 @@ Token Tokenizer::next() return next; } -bool Tokenizer::isDataTypeToken() -{ - return Grammar::isDataType(peek().value); -} - Token Tokenizer::buildNextToken() { @@ -258,7 +253,7 @@ Token Tokenizer::readID() std::string stringID(buffer.data()); - return Token(isKeyword(stringID) ? Token::Keyword : Token::Variable, stringID); + return Token(Grammar::isKeyword(stringID) ? Token::Keyword : Token::Variable, stringID); } @@ -285,11 +280,3 @@ Token Tokenizer::readOperator() return Token(Token::Operator, std::string(buffer.data())); } - - -#pragma mark - - -bool Tokenizer::isKeyword(const std::string &possibleKeyword) const -{ - return Grammar::isKeyword(possibleKeyword); -} diff --git a/src/lexer/EucleiaTokenizer.hpp b/src/lexer/EucleiaTokenizer.hpp index 9bc262f..3dd44ad 100644 --- a/src/lexer/EucleiaTokenizer.hpp +++ b/src/lexer/EucleiaTokenizer.hpp @@ -59,7 +59,7 @@ inline std::ostream &operator<<(std::ostream &out, const Token &token) class Tokenizer : public InputStream { - public: +public: Tokenizer() = delete; Tokenizer(const std::string fileString); ~Tokenizer() = default; @@ -75,9 +75,7 @@ class Tokenizer : public InputStream return _tokens.empty(); } - bool isDataTypeToken(); - - protected: +protected: void skipComment(); void skipWhitespace(); @@ -88,13 +86,11 @@ class Tokenizer : public InputStream Token readOperator(); Token readPunctuation(); - bool isKeyword(const std::string &possibleKeyword) const; - void generateTokens(); + Token buildNextToken(); - private: - std::set _allowedKeywords; +private: std::queue _tokens; }; diff --git a/src/parser/EucleiaParser.cpp b/src/parser/EucleiaParser.cpp index 04cf73a..1e2e3c8 100644 --- a/src/parser/EucleiaParser.cpp +++ b/src/parser/EucleiaParser.cpp @@ -8,6 +8,7 @@ #include "EucleiaParser.hpp" #include "EucleiaModules.hpp" #include "Exceptions.hpp" +#include "Grammar.hpp" #include "ObjectTypes.hpp" #include "TestModule.hpp" #include @@ -954,7 +955,7 @@ bool Parser::isKeyword(const std::string &keyword) bool Parser::isDataTypeKeyword() { - return (tokenizer.isDataTypeToken()); + return (Grammar::isDataType(peekToken().value)); } From 04be1d1c7f730a2013377897fc7d86e163dcbfb2 Mon Sep 17 00:00:00 2001 From: Edward Palmer <{username}@users.noreply.github.com> Date: Tue, 14 Jan 2025 20:39:51 +0000 Subject: [PATCH 2/4] Pragma once. --- src/lexer/EucleiaTokenizer.hpp | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/lexer/EucleiaTokenizer.hpp b/src/lexer/EucleiaTokenizer.hpp index 3dd44ad..0fd2288 100644 --- a/src/lexer/EucleiaTokenizer.hpp +++ b/src/lexer/EucleiaTokenizer.hpp @@ -5,9 +5,7 @@ // Created by Edward on 18/01/2024. // -#ifndef EucleiaTokenizer_hpp -#define EucleiaTokenizer_hpp - +#pragma once #include "EucleiaInputStream.hpp" #include #include @@ -93,5 +91,3 @@ class Tokenizer : public InputStream private: std::queue _tokens; }; - -#endif /* EucleiaTokenzier_hpp */ From 4f8745e547c2093277ee3d24bc9f7b265fb62c73 Mon Sep 17 00:00:00 2001 From: Edward Palmer <{username}@users.noreply.github.com> Date: Tue, 14 Jan 2025 20:43:13 +0000 Subject: [PATCH 3/4] Moves Token into separate file. --- src/lexer/EucleiaTokenizer.hpp | 45 +--------------------------- src/lexer/Token.cpp | 10 +++++++ src/lexer/Token.hpp | 55 ++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 44 deletions(-) create mode 100644 src/lexer/Token.cpp create mode 100644 src/lexer/Token.hpp diff --git a/src/lexer/EucleiaTokenizer.hpp b/src/lexer/EucleiaTokenizer.hpp index 0fd2288..9fe9f3c 100644 --- a/src/lexer/EucleiaTokenizer.hpp +++ b/src/lexer/EucleiaTokenizer.hpp @@ -7,54 +7,11 @@ #pragma once #include "EucleiaInputStream.hpp" +#include "Token.hpp" #include #include #include -// TODO: - bang in a namespace. - -struct Token -{ - enum TokenType - { - None, - Punctuation, - Keyword, - Variable, - String, - Operator, - Int, - Float, - Bool - }; - - Token(TokenType _type, std::string &&_value) - : type{_type}, value{_value} - { - } - Token(TokenType _type, std::string &_value) - : type{_type}, value{_value} - { - } - - static Token blank() - { - return Token(None, ""); - } - - std::string description() const; - - TokenType type; - std::string value; -}; - - -inline std::ostream &operator<<(std::ostream &out, const Token &token) -{ - return (out << std::string("(" + token.description() + ", " + token.value + ")")); -} - - class Tokenizer : public InputStream { public: diff --git a/src/lexer/Token.cpp b/src/lexer/Token.cpp new file mode 100644 index 0000000..7f3bac2 --- /dev/null +++ b/src/lexer/Token.cpp @@ -0,0 +1,10 @@ +/** + * @file Token.cpp + * @author Edward Palmer + * @date 2025-01-14 + * + * @copyright Copyright (c) 2025 + * + */ + +#include "Token.hpp" \ No newline at end of file diff --git a/src/lexer/Token.hpp b/src/lexer/Token.hpp new file mode 100644 index 0000000..fabefba --- /dev/null +++ b/src/lexer/Token.hpp @@ -0,0 +1,55 @@ +/** + * @file Token.hpp + * @author Edward Palmer + * @date 2025-01-14 + * + * @copyright Copyright (c) 2025 + * + */ + +#pragma once +#include +#include + +// TODO: - bang in a namespace. + +struct Token +{ + enum TokenType + { + None, + Punctuation, + Keyword, + Variable, + String, + Operator, + Int, + Float, + Bool + }; + + Token(TokenType _type, std::string &&_value) + : type{_type}, value{_value} + { + } + Token(TokenType _type, std::string &_value) + : type{_type}, value{_value} + { + } + + static Token blank() + { + return Token(None, ""); + } + + std::string description() const; + + TokenType type; + std::string value; +}; + + +inline std::ostream &operator<<(std::ostream &out, const Token &token) +{ + return (out << std::string("(" + token.description() + ", " + token.value + ")")); +} From 744a9bc2197e03cf7bb0c46b608d24efccf27fca Mon Sep 17 00:00:00 2001 From: Edward Palmer <{username}@users.noreply.github.com> Date: Tue, 14 Jan 2025 21:21:19 +0000 Subject: [PATCH 4/4] Cleanup; using EndOfFile token type. --- src/lexer/EucleiaTokenizer.cpp | 32 ++------------------------- src/lexer/Token.cpp | 40 +++++++++++++++++++++++++++++++++- src/lexer/Token.hpp | 39 ++++++++++----------------------- src/parser/EucleiaParser.cpp | 4 ++-- 4 files changed, 54 insertions(+), 61 deletions(-) diff --git a/src/lexer/EucleiaTokenizer.cpp b/src/lexer/EucleiaTokenizer.cpp index cc0dd40..ef38360 100644 --- a/src/lexer/EucleiaTokenizer.cpp +++ b/src/lexer/EucleiaTokenizer.cpp @@ -21,34 +21,6 @@ Tokenizer Tokenizer::loadFromFile(const std::string &fpath) } -std::string Token::description() const -{ - switch (type) - { - case None: - return "None"; - case Punctuation: - return "Punctuation"; - case Keyword: - return "Keyword"; - case Variable: - return "Variable"; - case String: - return "String"; - case Operator: - return "Operator"; - case Int: - return "Int"; - case Float: - return "Float"; - case Bool: - return "Bool"; - default: - return "Unknown"; - } -} - - Tokenizer::Tokenizer(const std::string fileString) : InputStream(std::move(fileString)) { @@ -62,7 +34,7 @@ void Tokenizer::generateTokens() { auto token = buildNextToken(); - if (token.type != Token::None) + if (token.type != Token::EndOfFile) { // std::cout << token << std::endl; _tokens.push(std::move(token)); @@ -129,7 +101,7 @@ Token Tokenizer::buildNextToken() } else if (isEof()) { - return Token::blank(); + return Token(Token::EndOfFile, ""); } else { diff --git a/src/lexer/Token.cpp b/src/lexer/Token.cpp index 7f3bac2..ffa196e 100644 --- a/src/lexer/Token.cpp +++ b/src/lexer/Token.cpp @@ -7,4 +7,42 @@ * */ -#include "Token.hpp" \ No newline at end of file +#include "Token.hpp" +#include "Exceptions.hpp" + +Token::Token(TokenType type_, std::string value_) + : type{type_}, value{std::move(value_)} +{ +} + + +std::string Token::print() const +{ + return typeName() + ": " + value; +} + + +std::string Token::typeName() const +{ + switch (type) + { + case EndOfFile: + return "end-of-file"; + case Punctuation: + return "punctuation"; + case Keyword: + return "keyword"; + case Variable: + return "variable"; + case String: + return "string"; + case Operator: + return "other"; + case Int: + return "int"; + case Float: + return "float"; + default: + ThrowException("unexpected token type: " + std::to_string(type)); + } +} \ No newline at end of file diff --git a/src/lexer/Token.hpp b/src/lexer/Token.hpp index fabefba..e71e1c8 100644 --- a/src/lexer/Token.hpp +++ b/src/lexer/Token.hpp @@ -8,48 +8,31 @@ */ #pragma once -#include #include -// TODO: - bang in a namespace. - -struct Token +class Token { +public: enum TokenType { - None, + EndOfFile, Punctuation, Keyword, Variable, String, Operator, Int, - Float, - Bool + Float }; - Token(TokenType _type, std::string &&_value) - : type{_type}, value{_value} - { - } - Token(TokenType _type, std::string &_value) - : type{_type}, value{_value} - { - } + Token() = delete; + Token(TokenType type, std::string value); - static Token blank() - { - return Token(None, ""); - } + std::string print() const; - std::string description() const; + const TokenType type; + const std::string value; - TokenType type; - std::string value; +private: + std::string typeName() const; }; - - -inline std::ostream &operator<<(std::ostream &out, const Token &token) -{ - return (out << std::string("(" + token.description() + ", " + token.value + ")")); -} diff --git a/src/parser/EucleiaParser.cpp b/src/parser/EucleiaParser.cpp index 1e2e3c8..49bf5a1 100644 --- a/src/parser/EucleiaParser.cpp +++ b/src/parser/EucleiaParser.cpp @@ -47,7 +47,7 @@ FileNode *Parser::buildAST() { std::vector nodes; - while (!tokenizer.empty() && peekToken().type != Token::None) + while (!tokenizer.empty() && peekToken().type != Token::EndOfFile) { auto node = parseExpression(); @@ -1016,5 +1016,5 @@ void Parser::unexpectedToken() { Token &token = peekToken(); - ThrowException("unexpected token of type " + token.description() + " and value " + token.value); + ThrowException("unexpected token: " + token.print()); }