diff --git a/src/lexer/EucleiaTokenizer.cpp b/src/lexer/EucleiaTokenizer.cpp index 381b3d6..ef38360 100644 --- a/src/lexer/EucleiaTokenizer.cpp +++ b/src/lexer/EucleiaTokenizer.cpp @@ -21,34 +21,6 @@ Tokenizer Tokenizer::loadFromFile(const std::string &fpath) } -std::string Token::description() const -{ - switch (type) - { - case None: - return "None"; - case Punctuation: - return "Punctuation"; - case Keyword: - return "Keyword"; - case Variable: - return "Variable"; - case String: - return "String"; - case Operator: - return "Operator"; - case Int: - return "Int"; - case Float: - return "Float"; - case Bool: - return "Bool"; - default: - return "Unknown"; - } -} - - Tokenizer::Tokenizer(const std::string fileString) : InputStream(std::move(fileString)) { @@ -62,7 +34,7 @@ void Tokenizer::generateTokens() { auto token = buildNextToken(); - if (token.type != Token::None) + if (token.type != Token::EndOfFile) { // std::cout << token << std::endl; _tokens.push(std::move(token)); @@ -92,11 +64,6 @@ Token Tokenizer::next() return next; } -bool Tokenizer::isDataTypeToken() -{ - return Grammar::isDataType(peek().value); -} - Token Tokenizer::buildNextToken() { @@ -134,7 +101,7 @@ Token Tokenizer::buildNextToken() } else if (isEof()) { - return Token::blank(); + return Token(Token::EndOfFile, ""); } else { @@ -258,7 +225,7 @@ Token Tokenizer::readID() std::string stringID(buffer.data()); - return Token(isKeyword(stringID) ? Token::Keyword : Token::Variable, stringID); + return Token(Grammar::isKeyword(stringID) ? Token::Keyword : Token::Variable, stringID); } @@ -285,11 +252,3 @@ Token Tokenizer::readOperator() return Token(Token::Operator, std::string(buffer.data())); } - - -#pragma mark - - -bool Tokenizer::isKeyword(const std::string &possibleKeyword) const -{ - return Grammar::isKeyword(possibleKeyword); -} diff --git a/src/lexer/EucleiaTokenizer.hpp b/src/lexer/EucleiaTokenizer.hpp index 9bc262f..9fe9f3c 100644 --- a/src/lexer/EucleiaTokenizer.hpp +++ b/src/lexer/EucleiaTokenizer.hpp @@ -5,61 +5,16 @@ // Created by Edward on 18/01/2024. // -#ifndef EucleiaTokenizer_hpp -#define EucleiaTokenizer_hpp - +#pragma once #include "EucleiaInputStream.hpp" +#include "Token.hpp" #include #include #include -// TODO: - bang in a namespace. - -struct Token -{ - enum TokenType - { - None, - Punctuation, - Keyword, - Variable, - String, - Operator, - Int, - Float, - Bool - }; - - Token(TokenType _type, std::string &&_value) - : type{_type}, value{_value} - { - } - Token(TokenType _type, std::string &_value) - : type{_type}, value{_value} - { - } - - static Token blank() - { - return Token(None, ""); - } - - std::string description() const; - - TokenType type; - std::string value; -}; - - -inline std::ostream &operator<<(std::ostream &out, const Token &token) -{ - return (out << std::string("(" + token.description() + ", " + token.value + ")")); -} - - class Tokenizer : public InputStream { - public: +public: Tokenizer() = delete; Tokenizer(const std::string fileString); ~Tokenizer() = default; @@ -75,9 +30,7 @@ class Tokenizer : public InputStream return _tokens.empty(); } - bool isDataTypeToken(); - - protected: +protected: void skipComment(); void skipWhitespace(); @@ -88,14 +41,10 @@ class Tokenizer : public InputStream Token readOperator(); Token readPunctuation(); - bool isKeyword(const std::string &possibleKeyword) const; - void generateTokens(); + Token buildNextToken(); - private: - std::set _allowedKeywords; +private: std::queue _tokens; }; - -#endif /* EucleiaTokenzier_hpp */ diff --git a/src/lexer/Token.cpp b/src/lexer/Token.cpp new file mode 100644 index 0000000..ffa196e --- /dev/null +++ b/src/lexer/Token.cpp @@ -0,0 +1,48 @@ +/** + * @file Token.cpp + * @author Edward Palmer + * @date 2025-01-14 + * + * @copyright Copyright (c) 2025 + * + */ + +#include "Token.hpp" +#include "Exceptions.hpp" + +Token::Token(TokenType type_, std::string value_) + : type{type_}, value{std::move(value_)} +{ +} + + +std::string Token::print() const +{ + return typeName() + ": " + value; +} + + +std::string Token::typeName() const +{ + switch (type) + { + case EndOfFile: + return "end-of-file"; + case Punctuation: + return "punctuation"; + case Keyword: + return "keyword"; + case Variable: + return "variable"; + case String: + return "string"; + case Operator: + return "other"; + case Int: + return "int"; + case Float: + return "float"; + default: + ThrowException("unexpected token type: " + std::to_string(type)); + } +} \ No newline at end of file diff --git a/src/lexer/Token.hpp b/src/lexer/Token.hpp new file mode 100644 index 0000000..e71e1c8 --- /dev/null +++ b/src/lexer/Token.hpp @@ -0,0 +1,38 @@ +/** + * @file Token.hpp + * @author Edward Palmer + * @date 2025-01-14 + * + * @copyright Copyright (c) 2025 + * + */ + +#pragma once +#include + +class Token +{ +public: + enum TokenType + { + EndOfFile, + Punctuation, + Keyword, + Variable, + String, + Operator, + Int, + Float + }; + + Token() = delete; + Token(TokenType type, std::string value); + + std::string print() const; + + const TokenType type; + const std::string value; + +private: + std::string typeName() const; +}; diff --git a/src/parser/EucleiaParser.cpp b/src/parser/EucleiaParser.cpp index 04cf73a..49bf5a1 100644 --- a/src/parser/EucleiaParser.cpp +++ b/src/parser/EucleiaParser.cpp @@ -8,6 +8,7 @@ #include "EucleiaParser.hpp" #include "EucleiaModules.hpp" #include "Exceptions.hpp" +#include "Grammar.hpp" #include "ObjectTypes.hpp" #include "TestModule.hpp" #include @@ -46,7 +47,7 @@ FileNode *Parser::buildAST() { std::vector nodes; - while (!tokenizer.empty() && peekToken().type != Token::None) + while (!tokenizer.empty() && peekToken().type != Token::EndOfFile) { auto node = parseExpression(); @@ -954,7 +955,7 @@ bool Parser::isKeyword(const std::string &keyword) bool Parser::isDataTypeKeyword() { - return (tokenizer.isDataTypeToken()); + return (Grammar::isDataType(peekToken().value)); } @@ -1015,5 +1016,5 @@ void Parser::unexpectedToken() { Token &token = peekToken(); - ThrowException("unexpected token of type " + token.description() + " and value " + token.value); + ThrowException("unexpected token: " + token.print()); }