diff --git a/compiler/include/compiler/ast/node_type.hpp b/compiler/include/compiler/ast/node_type.hpp index 465a5072..ad199cdb 100644 --- a/compiler/include/compiler/ast/node_type.hpp +++ b/compiler/include/compiler/ast/node_type.hpp @@ -62,6 +62,8 @@ enum class NodeType { VariableDeclaration, VariableName, WhileStatement, + ListStatement, + ListAccessor, }; } // namespace ast diff --git a/compiler/include/compiler/ast/types.hpp b/compiler/include/compiler/ast/types.hpp index 18f8262b..2325b20e 100644 --- a/compiler/include/compiler/ast/types.hpp +++ b/compiler/include/compiler/ast/types.hpp @@ -13,6 +13,7 @@ enum BuiltInTypes : TypeId { StrType = 3, NoneType = 4, BoolType = 5, + ListType = 6, BuiltInTypesCount, }; diff --git a/compiler/include/compiler/frontend/lexer/token_types.hpp b/compiler/include/compiler/frontend/lexer/token_types.hpp index 09e1a830..4a90ac24 100644 --- a/compiler/include/compiler/frontend/lexer/token_types.hpp +++ b/compiler/include/compiler/frontend/lexer/token_types.hpp @@ -25,6 +25,7 @@ enum class Keyword { True, None, False, + List, }; enum class Operator { diff --git a/compiler/lib/ast/node.cpp b/compiler/lib/ast/node.cpp index 4d6dde06..cf2fe665 100644 --- a/compiler/lib/ast/node.cpp +++ b/compiler/lib/ast/node.cpp @@ -4,6 +4,7 @@ #include #include "node_type.hpp" +#include "types.hpp" using namespace ast; @@ -84,6 +85,8 @@ const char *const typeIdToString(TypeId typeId) { return "BoolType"; case StrType: return "StrType"; + case ListType: + return "ListType"; case NoneType: return "NoneType"; } @@ -189,6 +192,12 @@ void Node::dump(std::ostream &stream, int depth) const { case NodeType::WhileStatement: stream << "WhileStatement\n"; break; + case NodeType::ListStatement: + stream << "ListStatement\n"; + break; + case NodeType::ListAccessor: + stream << "ListAccessor\n"; + break; default: stream << "Unknown\n"; } diff --git a/compiler/lib/frontend/lexer/lexer.cpp b/compiler/lib/frontend/lexer/lexer.cpp index 9e46a079..b3168794 100644 --- a/compiler/lib/frontend/lexer/lexer.cpp +++ b/compiler/lib/frontend/lexer/lexer.cpp @@ -24,6 +24,7 @@ std::unordered_map keywords = { {"or", Keyword::Or}, {"and", Keyword::And}, {"not", Keyword::Not}, {"in", Keyword::In}, {"True", Keyword::True}, {"None", Keyword::None}, + {"list", Keyword::List}, }; std::unordered_map operators = { diff --git a/compiler/lib/frontend/parser/parser.cpp b/compiler/lib/frontend/parser/parser.cpp index 28ee8926..40326b55 100644 --- a/compiler/lib/frontend/parser/parser.cpp +++ b/compiler/lib/frontend/parser/parser.cpp @@ -2,11 +2,17 @@ #include #include +#include +#include +#include #include +#include #include "compiler/ast/node.hpp" #include "compiler/ast/node_type.hpp" +#include "compiler/utils/error_buffer.hpp" +#include "lexer/token.hpp" #include "lexer/token_types.hpp" #include "parser/parser_context.hpp" #include "parser/parser_error.hpp" @@ -18,7 +24,7 @@ using namespace parser; namespace { -using SubExpression = std::variant; +using SubExpression = std::variant; bool isVariableDeclaration(const TokenIterator &tokenIter, const TokenIterator &tokenEnd) { if (tokenIter == tokenEnd || std::next(tokenIter) == tokenEnd || std::next(tokenIter, 2) == tokenEnd) @@ -42,6 +48,7 @@ enum class ExpressionTokenType { Operand, OpeningBrace, ClosingBrace, + RectBrace, }; OperationType getOperationType(const Token &token) { @@ -79,11 +86,11 @@ OperationType getOperationType(const Token &token) { return OperationType::Unknown; } -OperationType getOperationType(const ast::Node &node) { +OperationType getOperationType(const Node &node) { switch (node.type) { - case ast::NodeType::BinaryOperation: + case NodeType::BinaryOperation: return OperationType::Binary; - case ast::NodeType::UnaryOperation: + case NodeType::UnaryOperation: return OperationType::Unary; default: return OperationType::Unknown; @@ -99,13 +106,15 @@ ExpressionTokenType getExpressionTokenType(const Token &token) { return ExpressionTokenType::OpeningBrace; if (token.is(Operator::RightBrace)) return ExpressionTokenType::ClosingBrace; + if (token.is(Operator::RectLeftBrace) || token.is(Operator::RectRightBrace)) + return ExpressionTokenType::RectBrace; if (getOperationType(token) != OperationType::Unknown) return ExpressionTokenType::Operation; return ExpressionTokenType::Unknown; } ExpressionTokenType getExpressionTokenType(const SubExpression &subexpr) { - if (std::holds_alternative(subexpr)) + if (std::holds_alternative(subexpr)) return ExpressionTokenType::Operand; return getExpressionTokenType(*std::get(subexpr)); } @@ -195,8 +204,12 @@ bool isFunctionCall(const TokenIterator &tokenIter) { return tokenIter->type == TokenType::Identifier && std::next(tokenIter)->is(Operator::LeftBrace); } -void buildExpressionSubtree(std::stack postfixForm, ast::Node::Ptr root, ErrorBuffer &errors) { - ast::Node::Ptr currNode = root; +bool isListAccessor(const TokenIterator &tokenIter) { + return tokenIter->type == TokenType::Identifier && std::next(tokenIter)->is(Operator::RectLeftBrace); +} + +void buildExpressionSubtree(std::stack &postfixForm, const Node::Ptr &root, ErrorBuffer &errors) { + Node::Ptr currNode = root; while (!postfixForm.empty()) { const SubExpression &subexpr = postfixForm.top(); if (std::holds_alternative(subexpr)) { @@ -205,34 +218,32 @@ void buildExpressionSubtree(std::stack postfixForm, ast::Node::Pt if (expType == ExpressionTokenType::Operation) { OperationType opType = getOperationType(token); if (opType == OperationType::Binary) { - currNode = ParserContext::unshiftChildNode(currNode, ast::NodeType::BinaryOperation, token.ref); + currNode = ParserContext::unshiftChildNode(currNode, NodeType::BinaryOperation, token.ref); currNode->value = getBinaryOperation(token); } else if (opType == OperationType::Unary) { - currNode = ParserContext::unshiftChildNode(currNode, ast::NodeType::UnaryOperation, token.ref); + currNode = ParserContext::unshiftChildNode(currNode, NodeType::UnaryOperation, token.ref); } else { errors.push(token, "Unknown operator found in expression, it must be either unary or binary"); } } else if (expType == ExpressionTokenType::Operand) { if (token.type == TokenType::Identifier) { - ast::Node::Ptr node = - ParserContext::unshiftChildNode(currNode, ast::NodeType::VariableName, token.ref); + Node::Ptr node = ParserContext::unshiftChildNode(currNode, NodeType::VariableName, token.ref); node->value = token.id(); } else if (token.type == TokenType::IntegerLiteral) { - ast::Node::Ptr node = - ParserContext::unshiftChildNode(currNode, ast::NodeType::IntegerLiteralValue, token.ref); + Node::Ptr node = + ParserContext::unshiftChildNode(currNode, NodeType::IntegerLiteralValue, token.ref); node->value = std::atol(token.literal().c_str()); } else if (token.type == TokenType::FloatingPointLiteral) { - ast::Node::Ptr node = - ParserContext::unshiftChildNode(currNode, ast::NodeType::FloatingPointLiteralValue, token.ref); + Node::Ptr node = + ParserContext::unshiftChildNode(currNode, NodeType::FloatingPointLiteralValue, token.ref); node->value = std::stod(token.literal()); } else if (token.type == TokenType::StringLiteral) { - ast::Node::Ptr node = - ParserContext::unshiftChildNode(currNode, ast::NodeType::StringLiteralValue, token.ref); + Node::Ptr node = ParserContext::unshiftChildNode(currNode, NodeType::StringLiteralValue, token.ref); node->value = token.literal(); } else if (token.is(Keyword::False) || token.is(Keyword::True)) { - ast::Node::Ptr node = - ParserContext::unshiftChildNode(currNode, ast::NodeType::BooleanLiteralValue, token.ref); + Node::Ptr node = + ParserContext::unshiftChildNode(currNode, NodeType::BooleanLiteralValue, token.ref); if (token.is(Keyword::True)) node->value = true; else if (token.is(Keyword::False)) @@ -240,10 +251,11 @@ void buildExpressionSubtree(std::stack postfixForm, ast::Node::Pt } } } else { - ast::Node::Ptr funcCallNode = std::get(subexpr); - assert(funcCallNode->type == ast::NodeType::FunctionCall); - funcCallNode->parent = currNode; - currNode->children.push_front(funcCallNode); + // can be FunctionCall node and list ListAccessor + Node::Ptr callNode = std::get(subexpr); + assert(callNode->type == NodeType::FunctionCall or callNode->type == NodeType::ListAccessor); + callNode->parent = currNode; + currNode->children.push_front(callNode); } while (currNode->children.size() >= getOperandCount(getOperationType(*currNode))) currNode = currNode->parent; @@ -258,8 +270,8 @@ std::stack generatePostfixForm(TokenIterator tokenIterBegin, Toke for (auto tokenIter = tokenIterBegin; tokenIter != tokenIterEnd; tokenIter++) { const Token &token = *tokenIter; if (isFunctionCall(tokenIter)) { - ast::Node::Ptr funcCallNode = std::make_shared(ast::NodeType::FunctionCall); - auto node = ParserContext::pushChildNode(funcCallNode, ast::NodeType::FunctionName, token.ref); + Node::Ptr funcCallNode = std::make_shared(NodeType::FunctionCall); + auto node = ParserContext::pushChildNode(funcCallNode, NodeType::FunctionName, token.ref); node->value = token.id(); auto argsBegin = std::next(tokenIter); auto it = argsBegin; @@ -273,33 +285,53 @@ std::stack generatePostfixForm(TokenIterator tokenIterBegin, Toke } while (nestingLevel > 0); auto argsEnd = std::prev(it); if (std::distance(argsBegin, argsEnd) > 1) { - auto argsNode = ParserContext::pushChildNode(funcCallNode, ast::NodeType::FunctionArguments, token.ref); + auto argsNode = ParserContext::pushChildNode(funcCallNode, NodeType::FunctionArguments, token.ref); auto argBegin = std::next(argsBegin); for (auto argsIter = argBegin; argsIter != std::next(argsEnd); argsIter++) { if (!argsIter->is(Operator::Comma) && argsIter != argsEnd) continue; const Token &token = *argsIter; std::stack argPostfixForm = generatePostfixForm(argBegin, argsIter, errors); - auto exprNode = ParserContext::pushChildNode(argsNode, ast::NodeType::Expression, token.ref); + auto exprNode = ParserContext::pushChildNode(argsNode, NodeType::Expression, token.ref); buildExpressionSubtree(argPostfixForm, exprNode, errors); argBegin = std::next(argsIter); } } - postfixForm.push(funcCallNode); + postfixForm.emplace(funcCallNode); tokenIter = argsEnd; continue; } - OperationType opType = getOperationType(token); + if (isListAccessor(tokenIter)) { + Node::Ptr listAccessorNode = std::make_shared(NodeType::ListAccessor); + auto node = ParserContext::pushChildNode(listAccessorNode, NodeType::VariableName, token.ref); + node->value = token.id(); + auto exprBegin = std::next(tokenIter); + auto it = exprBegin; + unsigned nestingLevel = 0; + do { + if (it->is(Operator::RectRightBrace)) + nestingLevel--; + else if (it->is(Operator::RectLeftBrace)) + nestingLevel++; + it++; + } while (nestingLevel > 0); + std::stack argPostfixForm = generatePostfixForm(exprBegin, it, errors); + auto exprNode = ParserContext::pushChildNode(listAccessorNode, NodeType::Expression, token.ref); + buildExpressionSubtree(argPostfixForm, exprNode, errors); + postfixForm.emplace(listAccessorNode); + tokenIter = std::prev(it); + continue; + } ExpressionTokenType expType = getExpressionTokenType(token); if (expType == ExpressionTokenType::Operand) { - postfixForm.push(tokenIter); + postfixForm.emplace(tokenIter); } else if (expType == ExpressionTokenType::OpeningBrace) { - operations.push(tokenIter); + operations.emplace(tokenIter); } else if (expType == ExpressionTokenType::ClosingBrace) { bool foundBrace = false; while (!operations.empty()) { if (getExpressionTokenType(*operations.top()) != ExpressionTokenType::OpeningBrace) { - postfixForm.push(operations.top()); + postfixForm.emplace(operations.top()); operations.pop(); } else { foundBrace = true; @@ -313,28 +345,28 @@ std::stack generatePostfixForm(TokenIterator tokenIterBegin, Toke operations.pop(); // remove opening brace } else if (expType == ExpressionTokenType::Operation) { if (operations.empty() || getOperationPriority(token) < getOperationPriority(*operations.top())) { - operations.push(tokenIter); + operations.emplace(tokenIter); } else { while (!operations.empty() && getOperationPriority(*operations.top()) <= getOperationPriority(token)) { - postfixForm.push(operations.top()); + postfixForm.emplace(operations.top()); operations.pop(); } - operations.push(tokenIter); + operations.emplace(tokenIter); } + } else if (expType == ExpressionTokenType::RectBrace) { + continue; } else { errors.push(token, "Unexpected token inside an expression"); } } while (!operations.empty()) { - postfixForm.push(operations.top()); + postfixForm.emplace(operations.top()); operations.pop(); } return postfixForm; } -} // namespace - -static void parseBranchRoot(ParserContext &ctx) { +void parseBranchRoot(ParserContext &ctx) { while (ctx.nestingLevel > 0) { if (ctx.tokenIter == ctx.tokenEnd) return; @@ -353,7 +385,7 @@ static void parseBranchRoot(ParserContext &ctx) { " indentation(s) expected, " + std::to_string(currNestingLevel) + " indentation(s) given"); } else if (currNestingLevel < ctx.nestingLevel) { ctx.goParentNode(); - while (ctx.node->type != ast::NodeType::BranchRoot) { + while (ctx.node->type != NodeType::BranchRoot) { if (!ctx.node->parent) break; ctx.goParentNode(); @@ -365,57 +397,56 @@ static void parseBranchRoot(ParserContext &ctx) { const Token &currToken = ctx.token(); if (currToken.is(Keyword::If)) { - ctx.node = ctx.pushChildNode(ast::NodeType::IfStatement); + ctx.node = ctx.pushChildNode(NodeType::IfStatement); } else if (currToken.is(Keyword::While)) { - ctx.node = ctx.pushChildNode(ast::NodeType::WhileStatement); + ctx.node = ctx.pushChildNode(NodeType::WhileStatement); } else if (isVariableDeclaration(ctx.tokenIter, ctx.tokenEnd)) { - ctx.node = ctx.pushChildNode(ast::NodeType::VariableDeclaration); + ctx.node = ctx.pushChildNode(NodeType::VariableDeclaration); } else if (currToken.is(Keyword::Elif) || currToken.is(Keyword::Else)) { auto lastNode = ctx.node->children.back(); - if (lastNode->type == ast::NodeType::IfStatement) { - auto nodeType = - currToken.is(Keyword::Elif) ? ast::NodeType::ElifStatement : ast::NodeType::ElseStatement; + if (lastNode->type == NodeType::IfStatement) { + auto nodeType = currToken.is(Keyword::Elif) ? NodeType::ElifStatement : NodeType::ElseStatement; ctx.node = ParserContext::pushChildNode(lastNode, nodeType, currToken.ref); } else { ctx.pushError((currToken.is(Keyword::Elif) ? std::string("elif") : std::string("else")) + " is not allowed here"); } } else if (currToken.is(Keyword::Return)) { - ctx.node = ctx.pushChildNode(ast::NodeType::ReturnStatement); + ctx.node = ctx.pushChildNode(NodeType::ReturnStatement); } else { - ctx.node = ctx.pushChildNode(ast::NodeType::Expression); + ctx.node = ctx.pushChildNode(NodeType::Expression); } ctx.propagate(); } } -static void parseElifStatement(ParserContext &ctx) { +void parseElifStatement(ParserContext &ctx) { assert(ctx.tokenIter->is(Keyword::Elif)); ctx.goNextToken(); - ctx.node = ctx.pushChildNode(ast::NodeType::Expression); + ctx.node = ctx.pushChildNode(NodeType::Expression); ctx.propagate(); if (!ctx.token().is(Special::Colon)) { ctx.pushError("Colon expected here"); ctx.goNextExpression(); } - ctx.node = ctx.pushChildNode(ast::NodeType::BranchRoot); + ctx.node = ctx.pushChildNode(NodeType::BranchRoot); ctx.nestingLevel++; ctx.propagate(); } -static void parseElseStatement(ParserContext &ctx) { +void parseElseStatement(ParserContext &ctx) { assert(ctx.tokenIter->is(Keyword::Else)); ctx.goNextToken(); if (!ctx.token().is(Special::Colon)) { ctx.pushError("Colon expected here"); ctx.goNextExpression(); } - ctx.node = ctx.pushChildNode(ast::NodeType::BranchRoot); + ctx.node = ctx.pushChildNode(NodeType::BranchRoot); ctx.nestingLevel++; ctx.propagate(); } -static void parseExpression(ParserContext &ctx) { +void parseExpression(ParserContext &ctx) { auto it = ctx.tokenIter; while (!it->is(Special::Colon) && !it->is(Special::EndOfExpression)) it++; @@ -427,7 +458,7 @@ static void parseExpression(ParserContext &ctx) { ctx.goParentNode(); } -static void parseFunctionArguments(ParserContext &ctx) { +void parseFunctionArguments(ParserContext &ctx) { assert(ctx.token().is(Operator::LeftBrace)); ctx.goNextToken(); while (!ctx.token().is(Operator::RightBrace)) { @@ -440,10 +471,10 @@ static void parseFunctionArguments(ParserContext &ctx) { ctx.goNextToken(); break; } - auto node = ctx.pushChildNode(ast::NodeType::FunctionArgument); - auto argTypeNode = ParserContext::pushChildNode(node, ast::NodeType::TypeName, argType.ref); + auto node = ctx.pushChildNode(NodeType::FunctionArgument); + auto argTypeNode = ParserContext::pushChildNode(node, NodeType::TypeName, argType.ref); argTypeNode->value = TypeRegistry::typeId(argType); - auto argNameNode = ParserContext::pushChildNode(node, ast::NodeType::VariableName, argName.ref); + auto argNameNode = ParserContext::pushChildNode(node, NodeType::VariableName, argName.ref); argNameNode->value = argName.id(); const Token &last = *std::next(ctx.tokenIter, 3); @@ -456,18 +487,18 @@ static void parseFunctionArguments(ParserContext &ctx) { ctx.goNextToken(); } -static void parseFunctionDefinition(ParserContext &ctx) { +void parseFunctionDefinition(ParserContext &ctx) { assert(ctx.tokenIter->is(Keyword::Definition)); ctx.goNextToken(); if (ctx.token().type != TokenType::Identifier) { ctx.pushError("Given token is not allowed here in function definition"); } - ctx.pushChildNode(ast::NodeType::FunctionName)->value = ctx.token().id(); + ctx.pushChildNode(NodeType::FunctionName)->value = ctx.token().id(); ctx.goNextToken(); if (!ctx.token().is(Operator::LeftBrace)) { ctx.pushError("Given token is not allowed here in function definition"); } - ctx.node = ctx.pushChildNode(ast::NodeType::FunctionArguments); + ctx.node = ctx.pushChildNode(NodeType::FunctionArguments); ctx.propagate(); if (!ctx.token().is(Special::Arrow)) { ctx.pushError("Function return type is mandatory in its header"); @@ -476,35 +507,35 @@ static void parseFunctionDefinition(ParserContext &ctx) { if (!TypeRegistry::isTypename(ctx.token())) { ctx.pushError("Type name not found"); } - ctx.pushChildNode(ast::NodeType::FunctionReturnType)->value = TypeRegistry::typeId(ctx.token()); + ctx.pushChildNode(NodeType::FunctionReturnType)->value = TypeRegistry::typeId(ctx.token()); ctx.goNextToken(); if (!ctx.token().is(Special::Colon)) { ctx.pushError("Colon expected at the end of function header"); } ctx.goNextToken(); - ctx.node = ctx.pushChildNode(ast::NodeType::BranchRoot); + ctx.node = ctx.pushChildNode(NodeType::BranchRoot); ctx.nestingLevel = 1; ctx.propagate(); } -static void parseIfStatement(ParserContext &ctx) { +void parseIfStatement(ParserContext &ctx) { assert(ctx.tokenIter->is(Keyword::If)); ctx.goNextToken(); - ctx.node = ctx.pushChildNode(ast::NodeType::Expression); + ctx.node = ctx.pushChildNode(NodeType::Expression); ctx.propagate(); if (!ctx.token().is(Special::Colon)) { ctx.pushError("Colon expected here"); ctx.goNextExpression(); } - ctx.node = ctx.pushChildNode(ast::NodeType::BranchRoot); + ctx.node = ctx.pushChildNode(NodeType::BranchRoot); ctx.nestingLevel++; ctx.propagate(); } -static void parseProgramRoot(ParserContext &ctx) { +void parseProgramRoot(ParserContext &ctx) { while (ctx.tokenIter != ctx.tokenEnd) { if (ctx.token().is(Keyword::Definition)) { - ctx.node = ctx.pushChildNode(ast::NodeType::FunctionDefinition); + ctx.node = ctx.pushChildNode(NodeType::FunctionDefinition); ctx.propagate(); } else { ctx.pushError("Function definition was expected"); @@ -513,7 +544,7 @@ static void parseProgramRoot(ParserContext &ctx) { } } -static void parseReturnStatement(ParserContext &ctx) { +void parseReturnStatement(ParserContext &ctx) { assert(ctx.tokenIter->is(Keyword::Return)); ctx.goNextToken(); if (ctx.token().is(Special::EndOfExpression)) { @@ -529,31 +560,45 @@ static void parseReturnStatement(ParserContext &ctx) { ctx.goNextExpression(); return; } - ctx.node = ctx.pushChildNode(ast::NodeType::Expression); + ctx.node = ctx.pushChildNode(NodeType::Expression); ctx.propagate(); ctx.goParentNode(); } -static void parseVariableDeclaration(ParserContext &ctx) { +void parseVariableDeclaration(ParserContext &ctx) { ctx.goNextToken(); const Token &colon = ctx.token(); const Token &varName = *std::prev(ctx.tokenIter); - const Token &varType = *std::next(ctx.tokenIter); - - auto node = ctx.pushChildNode(ast::NodeType::TypeName); + const Token &varType = (std::advance(ctx.tokenIter, 1), ctx.token()); + auto node = ctx.pushChildNode(NodeType::TypeName); node->value = TypeRegistry::typeId(varType); - node = ctx.pushChildNode(ast::NodeType::VariableName); + bool isListType = varType.is(Keyword::List); + + if (isListType) { + const Token &leftBrace = (std::advance(ctx.tokenIter, 1), ctx.token()); + const Token &varTypeList = (std::advance(ctx.tokenIter, 1), ctx.token()); + const Token &rightBrace = (std::advance(ctx.tokenIter, 1), ctx.token()); + if (!leftBrace.is(Operator::RectLeftBrace) || !rightBrace.is(Operator::RectRightBrace)) { + ctx.pushError("Unexepted syntax for list declaration"); + } + auto listTypeNode = ParserContext::pushChildNode(node, NodeType::TypeName, ctx.tokenIter->ref); + listTypeNode->value = TypeRegistry::typeId(varTypeList); + } + node = ctx.pushChildNode(NodeType::VariableName); node->value = varName.id(); - auto endOfDecl = std::next(ctx.tokenIter, 2); + auto endOfDecl = std::next(ctx.tokenIter); if (endOfDecl->is(Special::EndOfExpression)) { // declaration without definition - std::advance(ctx.tokenIter, 3); + std::advance(ctx.tokenIter, 2); ctx.goParentNode(); } else if (endOfDecl->is(Operator::Assign)) { // declaration with definition - ctx.node = ctx.pushChildNode(ast::NodeType::Expression); - std::advance(ctx.tokenIter, 3); + ctx.node = ctx.pushChildNode(NodeType::Expression); + if (isListType) { + ctx.node = ctx.pushChildNode(NodeType::ListStatement); + } + std::advance(ctx.tokenIter, 2); ctx.propagate(); ctx.goParentNode(); } else { @@ -561,24 +606,48 @@ static void parseVariableDeclaration(ParserContext &ctx) { } } -static void parseWhileStatement(ParserContext &ctx) { +void parseWhileStatement(ParserContext &ctx) { assert(ctx.tokenIter->is(Keyword::While)); ctx.goNextToken(); - ctx.node = ctx.pushChildNode(ast::NodeType::Expression); + ctx.node = ctx.pushChildNode(NodeType::Expression); ctx.propagate(); if (!ctx.token().is(Special::Colon)) { ctx.pushError("Colon expected here"); ctx.goNextExpression(); } - ctx.node = ctx.pushChildNode(ast::NodeType::BranchRoot); + ctx.node = ctx.pushChildNode(NodeType::BranchRoot); ctx.nestingLevel++; ctx.propagate(); } +void parseListStatement(ParserContext &ctx) { + assert(ctx.tokenIter->is(Operator::RectLeftBrace)); + while (!ctx.token().is(Operator::RectRightBrace)) { + ctx.goNextToken(); + auto it = ctx.tokenIter; + + while (!it->is(Operator::Comma) && !it->is(Operator::RectRightBrace)) + it++; + const auto &tokenIterBegin = ctx.tokenIter; + const auto &tokenIterEnd = it; + if (tokenIterEnd->is(Special::EndOfExpression)) { + ctx.errors.push(*tokenIterEnd, "']' was expected"); + } + ctx.node = ctx.pushChildNode(NodeType::Expression); + std::stack postfixForm = generatePostfixForm(tokenIterBegin, tokenIterEnd, ctx.errors); + buildExpressionSubtree(postfixForm, ctx.node, ctx.errors); + ctx.tokenIter = tokenIterEnd; + ctx.goParentNode(); + } + ctx.goNextToken(); + ctx.goParentNode(); + ctx.goParentNode(); +} + // clang-format off -#define SUBPARSER(NodeTypeVal) {ast::NodeType::NodeTypeVal, parse##NodeTypeVal} +#define SUBPARSER(NodeTypeVal) {NodeType::NodeTypeVal, parse##NodeTypeVal} -static std::unordered_map> subparsers = { +static std::unordered_map> subparsers = { SUBPARSER(BranchRoot), SUBPARSER(ElifStatement), SUBPARSER(ElseStatement), @@ -590,9 +659,12 @@ static std::unordered_map> s SUBPARSER(ReturnStatement), SUBPARSER(VariableDeclaration), SUBPARSER(WhileStatement), + SUBPARSER(ListStatement), }; // clang-format on +} // namespace + SyntaxTree Parser::process(const TokenList &tokens) { SyntaxTree tree; tree.root = std::make_shared(NodeType::ProgramRoot); diff --git a/compiler/lib/frontend/parser/type_registry.cpp b/compiler/lib/frontend/parser/type_registry.cpp index 79cb2fa5..af2d7cf9 100644 --- a/compiler/lib/frontend/parser/type_registry.cpp +++ b/compiler/lib/frontend/parser/type_registry.cpp @@ -1,4 +1,6 @@ #include "parser/type_registry.hpp" +#include "compiler/ast/types.hpp" +#include "lexer/token_types.hpp" using ast::TypeId; using namespace lexer; @@ -26,6 +28,8 @@ ast::TypeId TypeRegistry::typeId(const Token &token) { return ast::BoolType; if (token.is(Keyword::Str)) return ast::StrType; + if (token.is(Keyword::List)) + return ast::ListType; if (token.is(Keyword::None)) return ast::NoneType; return ast::UnknownType; diff --git a/compiler/tests/frontend/lexer.cpp b/compiler/tests/frontend/lexer.cpp index 326c178a..de70e126 100644 --- a/compiler/tests/frontend/lexer.cpp +++ b/compiler/tests/frontend/lexer.cpp @@ -455,3 +455,50 @@ TEST(Lexer, raise_error_on_id_starting_with_special) { StringVec source = {"int @x"}; ASSERT_THROW(Lexer::process(source), ErrorBuffer); } + +TEST(Lexer, rect_brace_expression) { + StringVec source = {"[]"}; + TokenList transformed = Lexer::process(source); + TokenList expected; + expected.emplace_back(Operator::RectLeftBrace); + expected.emplace_back(Operator::RectRightBrace); + expected.emplace_back(Special::EndOfExpression); + ASSERT_EQ(expected, transformed); +} + +TEST(Lexer, rect_brace_expression_with_values) { + StringVec source = {"[1, 3.0, Z]"}; + TokenList transformed = Lexer::process(source); + TokenList expected; + expected.emplace_back(Operator::RectLeftBrace); + expected.emplace_back(TokenType::IntegerLiteral, "1"); + expected.emplace_back(Operator::Comma); + expected.emplace_back(TokenType::FloatingPointLiteral, "3.0"); + expected.emplace_back(Operator::Comma); + expected.emplace_back(TokenType::Identifier, "Z"); + expected.emplace_back(Operator::RectRightBrace); + expected.emplace_back(Special::EndOfExpression); + ASSERT_EQ(expected, transformed); +} + +TEST(Lexer, list_expression) { + StringVec source = {"mylist: list[int] = [1, 2, 3]"}; + TokenList transformed = Lexer::process(source); + TokenList expected; + expected.emplace_back(TokenType::Identifier, "mylist"); + expected.emplace_back(Special::Colon); + expected.emplace_back(Keyword::List); + expected.emplace_back(Operator::RectLeftBrace); + expected.emplace_back(Keyword::Int); + expected.emplace_back(Operator::RectRightBrace); + expected.emplace_back(Operator::Assign); + expected.emplace_back(Operator::RectLeftBrace); + expected.emplace_back(TokenType::IntegerLiteral, "1"); + expected.emplace_back(Operator::Comma); + expected.emplace_back(TokenType::IntegerLiteral, "2"); + expected.emplace_back(Operator::Comma); + expected.emplace_back(TokenType::IntegerLiteral, "3"); + expected.emplace_back(Operator::RectRightBrace); + expected.emplace_back(Special::EndOfExpression); + ASSERT_EQ(expected, transformed); +} diff --git a/compiler/tests/frontend/parser.cpp b/compiler/tests/frontend/parser.cpp index 425ec64c..590d9da0 100644 --- a/compiler/tests/frontend/parser.cpp +++ b/compiler/tests/frontend/parser.cpp @@ -491,3 +491,207 @@ TEST(Parser, can_parse_bool) { " BooleanLiteralValue: False\n"; ASSERT_EQ(expected, tree.dump()); } + +TEST(Parser, can_parse_list_declaration) { + StringVec source = { + "def main() -> None:", + " mylist : list[int]", + }; + TokenList tokens = Lexer::process(source); + SyntaxTree tree = Parser::process(tokens); + std::string expected = "ProgramRoot\n" + " FunctionDefinition\n" + " FunctionName: main\n" + " FunctionArguments\n" + " FunctionReturnType: NoneType\n" + " BranchRoot\n" + " VariableDeclaration\n" + " TypeName: ListType\n" + " TypeName: IntType\n" + " VariableName: mylist\n"; + ASSERT_EQ(expected, tree.dump()); +} + +TEST(Parser, can_parse_list_definition) { + StringVec source = { + "def main() -> None:", + " mylist : list[int] = [1 + 1, 1 + 2, 2 + 3]", + }; + TokenList tokens = Lexer::process(source); + SyntaxTree tree = Parser::process(tokens); + std::string expected = "ProgramRoot\n" + " FunctionDefinition\n" + " FunctionName: main\n" + " FunctionArguments\n" + " FunctionReturnType: NoneType\n" + " BranchRoot\n" + " VariableDeclaration\n" + " TypeName: ListType\n" + " TypeName: IntType\n" + " VariableName: mylist\n" + " Expression\n" + " ListStatement\n" + " Expression\n" + " BinaryOperation: Add\n" + " IntegerLiteralValue: 1\n" + " IntegerLiteralValue: 1\n" + " Expression\n" + " BinaryOperation: Add\n" + " IntegerLiteralValue: 1\n" + " IntegerLiteralValue: 2\n" + " Expression\n" + " BinaryOperation: Add\n" + " IntegerLiteralValue: 2\n" + " IntegerLiteralValue: 3\n"; + ASSERT_EQ(expected, tree.dump()); +} + +TEST(Parser, can_parse_list_access) { + StringVec source = { + "def main() -> None:", + " mylist : list[int] = [1, 2, 3]", + " x : int = 1 + mylist[0] - (mylist[1 + 2 * 3] * 2 * mylist[2])", + }; + // + TokenList tokens = Lexer::process(source); + SyntaxTree tree = Parser::process(tokens); + std::string expected = "ProgramRoot\n" + " FunctionDefinition\n" + " FunctionName: main\n" + " FunctionArguments\n" + " FunctionReturnType: NoneType\n" + " BranchRoot\n" + " VariableDeclaration\n" + " TypeName: ListType\n" + " TypeName: IntType\n" + " VariableName: mylist\n" + " Expression\n" + " ListStatement\n" + " Expression\n" + " IntegerLiteralValue: 1\n" + " Expression\n" + " IntegerLiteralValue: 2\n" + " Expression\n" + " IntegerLiteralValue: 3\n" + " VariableDeclaration\n" + " TypeName: IntType\n" + " VariableName: x\n" + " Expression\n" + " BinaryOperation: Sub\n" + " BinaryOperation: Add\n" + " IntegerLiteralValue: 1\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " IntegerLiteralValue: 0\n" + " BinaryOperation: Mult\n" + " BinaryOperation: Mult\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " BinaryOperation: Add\n" + " IntegerLiteralValue: 1\n" + " BinaryOperation: Mult\n" + " IntegerLiteralValue: 2\n" + " IntegerLiteralValue: 3\n" + " IntegerLiteralValue: 2\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " IntegerLiteralValue: 2\n"; + ASSERT_EQ(expected, tree.dump()); +} + +TEST(Parser, can_parse_complex_nested_list) { + StringVec source = { + "def main() -> None:", + " mylist : list[int] = [1, 2, 3]", + " x : int = mylist[mylist[1]]", + " y : int = mylist[mylist[1] + mylist[mylist[mylist[1]]]]", + }; + TokenList tokens = Lexer::process(source); + SyntaxTree tree = Parser::process(tokens); + std::string expected = "ProgramRoot\n" + " FunctionDefinition\n" + " FunctionName: main\n" + " FunctionArguments\n" + " FunctionReturnType: NoneType\n" + " BranchRoot\n" + " VariableDeclaration\n" + " TypeName: ListType\n" + " TypeName: IntType\n" + " VariableName: mylist\n" + " Expression\n" + " ListStatement\n" + " Expression\n" + " IntegerLiteralValue: 1\n" + " Expression\n" + " IntegerLiteralValue: 2\n" + " Expression\n" + " IntegerLiteralValue: 3\n" + " VariableDeclaration\n" + " TypeName: IntType\n" + " VariableName: x\n" + " Expression\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " IntegerLiteralValue: 1\n" + " VariableDeclaration\n" + " TypeName: IntType\n" + " VariableName: y\n" + " Expression\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " BinaryOperation: Add\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " IntegerLiteralValue: 1\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " IntegerLiteralValue: 1\n"; + ASSERT_EQ(expected, tree.dump()); +} + +TEST(Parser, can_parse_list_access_with_nested_function_call) { + StringVec source = { + "def main() -> None:", + " x = mylist[foo(bar(y))]", + }; + TokenList tokens = Lexer::process(source); + SyntaxTree tree = Parser::process(tokens); + std::string expected = "ProgramRoot\n" + " FunctionDefinition\n" + " FunctionName: main\n" + " FunctionArguments\n" + " FunctionReturnType: NoneType\n" + " BranchRoot\n" + " Expression\n" + " BinaryOperation: Assign\n" + " VariableName: x\n" + " ListAccessor\n" + " VariableName: mylist\n" + " Expression\n" + " FunctionCall\n" + " FunctionName: foo\n" + " FunctionArguments\n" + " Expression\n" + " FunctionCall\n" + " FunctionName: bar\n" + " FunctionArguments\n" + " Expression\n" + " VariableName: y\n"; + ASSERT_EQ(expected, tree.dump()); +}