Skip to content

Commit

Permalink
Merge pull request #33 from EdwardPalmer99/EdwardPalmer99/feature/cle…
Browse files Browse the repository at this point in the history
…anup-tokenizer

Rewrites Token
  • Loading branch information
EdwardPalmer99 authored Jan 14, 2025
2 parents bd5e8a4 + 744a9bc commit 08ea124
Show file tree
Hide file tree
Showing 5 changed files with 99 additions and 104 deletions.
47 changes: 3 additions & 44 deletions src/lexer/EucleiaTokenizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,6 @@ Tokenizer Tokenizer::loadFromFile(const std::string &fpath)
}


std::string Token::description() const
{
switch (type)
{
case None:
return "None";
case Punctuation:
return "Punctuation";
case Keyword:
return "Keyword";
case Variable:
return "Variable";
case String:
return "String";
case Operator:
return "Operator";
case Int:
return "Int";
case Float:
return "Float";
case Bool:
return "Bool";
default:
return "Unknown";
}
}


Tokenizer::Tokenizer(const std::string fileString)
: InputStream(std::move(fileString))
{
Expand All @@ -62,7 +34,7 @@ void Tokenizer::generateTokens()
{
auto token = buildNextToken();

if (token.type != Token::None)
if (token.type != Token::EndOfFile)
{
// std::cout << token << std::endl;
_tokens.push(std::move(token));
Expand Down Expand Up @@ -92,11 +64,6 @@ Token Tokenizer::next()
return next;
}

bool Tokenizer::isDataTypeToken()
{
return Grammar::isDataType(peek().value);
}


Token Tokenizer::buildNextToken()
{
Expand Down Expand Up @@ -134,7 +101,7 @@ Token Tokenizer::buildNextToken()
}
else if (isEof())
{
return Token::blank();
return Token(Token::EndOfFile, "");
}
else
{
Expand Down Expand Up @@ -258,7 +225,7 @@ Token Tokenizer::readID()

std::string stringID(buffer.data());

return Token(isKeyword(stringID) ? Token::Keyword : Token::Variable, stringID);
return Token(Grammar::isKeyword(stringID) ? Token::Keyword : Token::Variable, stringID);
}


Expand All @@ -285,11 +252,3 @@ Token Tokenizer::readOperator()

return Token(Token::Operator, std::string(buffer.data()));
}


#pragma mark -

bool Tokenizer::isKeyword(const std::string &possibleKeyword) const
{
return Grammar::isKeyword(possibleKeyword);
}
63 changes: 6 additions & 57 deletions src/lexer/EucleiaTokenizer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,61 +5,16 @@
// Created by Edward on 18/01/2024.
//

#ifndef EucleiaTokenizer_hpp
#define EucleiaTokenizer_hpp

#pragma once
#include "EucleiaInputStream.hpp"
#include "Token.hpp"
#include <queue>
#include <set>
#include <string>

// TODO: - bang in a namespace.

struct Token
{
enum TokenType
{
None,
Punctuation,
Keyword,
Variable,
String,
Operator,
Int,
Float,
Bool
};

Token(TokenType _type, std::string &&_value)
: type{_type}, value{_value}
{
}
Token(TokenType _type, std::string &_value)
: type{_type}, value{_value}
{
}

static Token blank()
{
return Token(None, "");
}

std::string description() const;

TokenType type;
std::string value;
};


inline std::ostream &operator<<(std::ostream &out, const Token &token)
{
return (out << std::string("(" + token.description() + ", " + token.value + ")"));
}


class Tokenizer : public InputStream
{
public:
public:
Tokenizer() = delete;
Tokenizer(const std::string fileString);
~Tokenizer() = default;
Expand All @@ -75,9 +30,7 @@ class Tokenizer : public InputStream
return _tokens.empty();
}

bool isDataTypeToken();

protected:
protected:
void skipComment();
void skipWhitespace();

Expand All @@ -88,14 +41,10 @@ class Tokenizer : public InputStream
Token readOperator();
Token readPunctuation();

bool isKeyword(const std::string &possibleKeyword) const;

void generateTokens();

Token buildNextToken();

private:
std::set<std::string> _allowedKeywords;
private:
std::queue<Token> _tokens;
};

#endif /* EucleiaTokenzier_hpp */
48 changes: 48 additions & 0 deletions src/lexer/Token.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/**
* @file Token.cpp
* @author Edward Palmer
* @date 2025-01-14
*
* @copyright Copyright (c) 2025
*
*/

#include "Token.hpp"
#include "Exceptions.hpp"

Token::Token(TokenType type_, std::string value_)
: type{type_}, value{std::move(value_)}
{
}


std::string Token::print() const
{
return typeName() + ": " + value;
}


std::string Token::typeName() const
{
switch (type)
{
case EndOfFile:
return "end-of-file";
case Punctuation:
return "punctuation";
case Keyword:
return "keyword";
case Variable:
return "variable";
case String:
return "string";
case Operator:
return "other";
case Int:
return "int";
case Float:
return "float";
default:
ThrowException("unexpected token type: " + std::to_string(type));
}
}
38 changes: 38 additions & 0 deletions src/lexer/Token.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* @file Token.hpp
* @author Edward Palmer
* @date 2025-01-14
*
* @copyright Copyright (c) 2025
*
*/

#pragma once
#include <string>

class Token
{
public:
enum TokenType
{
EndOfFile,
Punctuation,
Keyword,
Variable,
String,
Operator,
Int,
Float
};

Token() = delete;
Token(TokenType type, std::string value);

std::string print() const;

const TokenType type;
const std::string value;

private:
std::string typeName() const;
};
7 changes: 4 additions & 3 deletions src/parser/EucleiaParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "EucleiaParser.hpp"
#include "EucleiaModules.hpp"
#include "Exceptions.hpp"
#include "Grammar.hpp"
#include "ObjectTypes.hpp"
#include "TestModule.hpp"
#include <assert.h>
Expand Down Expand Up @@ -46,7 +47,7 @@ FileNode *Parser::buildAST()
{
std::vector<BaseNode *> nodes;

while (!tokenizer.empty() && peekToken().type != Token::None)
while (!tokenizer.empty() && peekToken().type != Token::EndOfFile)
{
auto node = parseExpression();

Expand Down Expand Up @@ -954,7 +955,7 @@ bool Parser::isKeyword(const std::string &keyword)

bool Parser::isDataTypeKeyword()
{
return (tokenizer.isDataTypeToken());
return (Grammar::isDataType(peekToken().value));
}


Expand Down Expand Up @@ -1015,5 +1016,5 @@ void Parser::unexpectedToken()
{
Token &token = peekToken();

ThrowException("unexpected token of type " + token.description() + " and value " + token.value);
ThrowException("unexpected token: " + token.print());
}

0 comments on commit 08ea124

Please sign in to comment.