Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support list syntax in lexer and parser #127

Merged
merged 14 commits into from
May 4, 2024
1 change: 1 addition & 0 deletions compiler/include/compiler/ast/node_type.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ enum class NodeType {
VariableDeclaration,
VariableName,
WhileStatement,
ListStatement,
};

} // namespace ast
1 change: 1 addition & 0 deletions compiler/include/compiler/ast/types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ enum BuiltInTypes : TypeId {
StrType = 3,
NoneType = 4,
BoolType = 5,
ListType = 6,
BuiltInTypesCount,
};

Expand Down
1 change: 1 addition & 0 deletions compiler/include/compiler/frontend/lexer/token_types.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ enum class Keyword {
True,
None,
False,
List
vla5924 marked this conversation as resolved.
Show resolved Hide resolved
};

enum class Operator {
Expand Down
5 changes: 5 additions & 0 deletions compiler/lib/ast/node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@
return "BoolType";
case StrType:
return "StrType";
case ListType:
Fixed Show fixed Hide fixed
return "ListType";
case NoneType:
return "NoneType";
}
Expand Down Expand Up @@ -189,6 +191,9 @@
case NodeType::WhileStatement:
stream << "WhileStatement\n";
break;
case NodeType::ListStatement:
stream << "ListStatement\n";
break;
default:
stream << "Unknown\n";
}
Expand Down
1 change: 1 addition & 0 deletions compiler/lib/frontend/lexer/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ std::unordered_map<std::string_view, Keyword> keywords = {
{"or", Keyword::Or}, {"and", Keyword::And},
{"not", Keyword::Not}, {"in", Keyword::In},
{"True", Keyword::True}, {"None", Keyword::None},
{"list", Keyword::List},
};

std::unordered_map<std::string_view, Operator> operators = {
Expand Down
49 changes: 44 additions & 5 deletions compiler/lib/frontend/parser/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -538,22 +538,36 @@
ctx.goNextToken();
const Token &colon = ctx.token();
const Token &varName = *std::prev(ctx.tokenIter);
const Token &varType = *std::next(ctx.tokenIter);

const Token &varType = (std::advance(ctx.tokenIter, 1), ctx.token());
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
auto node = ctx.pushChildNode(ast::NodeType::TypeName);
node->value = TypeRegistry::typeId(varType);
bool isListType = varType.is(Keyword::List);

if (isListType) {
const Token &leftBrace = (std::advance(ctx.tokenIter, 1), ctx.token());
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
const Token &varTypeList = (std::advance(ctx.tokenIter, 1), ctx.token());
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
const Token &rightBrace = (std::advance(ctx.tokenIter, 1), ctx.token());
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
if (!leftBrace.is(Operator::RectLeftBrace) || !rightBrace.is(Operator::RectRightBrace)) {
ctx.pushError("Unexepted syntax for list declaration");
}
auto listTypeNode = ParserContext::pushChildNode(node, ast::NodeType::TypeName, ctx.tokenIter->ref);
listTypeNode->value = TypeRegistry::typeId(varTypeList);
}
node = ctx.pushChildNode(ast::NodeType::VariableName);
node->value = varName.id();

auto endOfDecl = std::next(ctx.tokenIter, 2);
auto endOfDecl = std::next(ctx.tokenIter);
Fixed Show fixed Hide fixed
if (endOfDecl->is(Special::EndOfExpression)) {
// declaration without definition
std::advance(ctx.tokenIter, 3);
std::advance(ctx.tokenIter, 2);
Fixed Show fixed Hide fixed
ctx.goParentNode();
} else if (endOfDecl->is(Operator::Assign)) {
// declaration with definition
ctx.node = ctx.pushChildNode(ast::NodeType::Expression);
std::advance(ctx.tokenIter, 3);
if (isListType) {
ctx.node = ctx.pushChildNode(ast::NodeType::ListStatement);
}
std::advance(ctx.tokenIter, 2);
Fixed Show fixed Hide fixed
ctx.propagate();
ctx.goParentNode();
} else {
Expand All @@ -575,6 +589,30 @@
ctx.propagate();
}

static void parseListStatement(ParserContext &ctx) {
Fixed Show fixed Hide fixed
assert(ctx.tokenIter->is(Operator::RectLeftBrace));
while (!ctx.token().is(Operator::RectRightBrace)) {
ctx.goNextToken();
auto it = ctx.tokenIter;

while (!it->is(Operator::Comma) && !it->is(Operator::RectRightBrace))
it++;
const auto &tokenIterBegin = ctx.tokenIter;
const auto &tokenIterEnd = it;
if (tokenIterEnd->is(Special::EndOfExpression)) {
ctx.errors.push<ParserError>(*tokenIterEnd, "']' was expected");
}
ctx.node = ctx.pushChildNode(ast::NodeType::Expression);
std::stack<SubExpression> postfixForm = generatePostfixForm(tokenIterBegin, tokenIterEnd, ctx.errors);
Fixed Show fixed Hide fixed
buildExpressionSubtree(postfixForm, ctx.node, ctx.errors);
ctx.tokenIter = tokenIterEnd;
ctx.goParentNode();
}
ctx.goNextToken();
ctx.goParentNode();
ctx.goParentNode();
}

// clang-format off
#define SUBPARSER(NodeTypeVal) {ast::NodeType::NodeTypeVal, parse##NodeTypeVal}

Expand All @@ -590,6 +628,7 @@
SUBPARSER(ReturnStatement),
SUBPARSER(VariableDeclaration),
SUBPARSER(WhileStatement),
SUBPARSER(ListStatement),
};
// clang-format on

Expand Down
2 changes: 2 additions & 0 deletions compiler/lib/frontend/parser/type_registry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
return ast::BoolType;
if (token.is(Keyword::Str))
return ast::StrType;
if (token.is(Keyword::List))
Fixed Show fixed Hide fixed
Fixed Show fixed Hide fixed
return ast::ListType;
Fixed Show fixed Hide fixed
if (token.is(Keyword::None))
return ast::NoneType;
return ast::UnknownType;
Expand Down
47 changes: 47 additions & 0 deletions compiler/tests/frontend/lexer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,3 +455,50 @@ TEST(Lexer, raise_error_on_id_starting_with_special) {
StringVec source = {"int @x"};
ASSERT_THROW(Lexer::process(source), ErrorBuffer);
}

TEST(Lexer, rect_brace_expression) {
StringVec source = {"[]"};
TokenList transformed = Lexer::process(source);
TokenList expected;
expected.emplace_back(Operator::RectLeftBrace);
expected.emplace_back(Operator::RectRightBrace);
expected.emplace_back(Special::EndOfExpression);
ASSERT_EQ(expected, transformed);
}

TEST(Lexer, rect_brace_expression_with_values) {
StringVec source = {"[1, 3.0, Z]"};
TokenList transformed = Lexer::process(source);
TokenList expected;
expected.emplace_back(Operator::RectLeftBrace);
expected.emplace_back(TokenType::IntegerLiteral, "1");
expected.emplace_back(Operator::Comma);
expected.emplace_back(TokenType::FloatingPointLiteral, "3.0");
expected.emplace_back(Operator::Comma);
expected.emplace_back(TokenType::Identifier, "Z");
expected.emplace_back(Operator::RectRightBrace);
expected.emplace_back(Special::EndOfExpression);
ASSERT_EQ(expected, transformed);
}

TEST(Lexer, list_expression) {
StringVec source = {"mylist: list[int] = [1, 2, 3]"};
TokenList transformed = Lexer::process(source);
TokenList expected;
expected.emplace_back(TokenType::Identifier, "mylist");
expected.emplace_back(Special::Colon);
expected.emplace_back(Keyword::List);
expected.emplace_back(Operator::RectLeftBrace);
expected.emplace_back(Keyword::Int);
expected.emplace_back(Operator::RectRightBrace);
expected.emplace_back(Operator::Assign);
expected.emplace_back(Operator::RectLeftBrace);
expected.emplace_back(TokenType::IntegerLiteral, "1");
expected.emplace_back(Operator::Comma);
expected.emplace_back(TokenType::IntegerLiteral, "2");
expected.emplace_back(Operator::Comma);
expected.emplace_back(TokenType::IntegerLiteral, "3");
expected.emplace_back(Operator::RectRightBrace);
expected.emplace_back(Special::EndOfExpression);
ASSERT_EQ(expected, transformed);
}
54 changes: 54 additions & 0 deletions compiler/tests/frontend/parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -491,3 +491,57 @@ TEST(Parser, can_parse_bool) {
" BooleanLiteralValue: False\n";
ASSERT_EQ(expected, tree.dump());
}

TEST(Parser, can_parse_list) {
vla5924 marked this conversation as resolved.
Show resolved Hide resolved
StringVec source = {
"def main() -> None:",
" mylist : list[int]",
};
TokenList tokens = Lexer::process(source);
SyntaxTree tree = Parser::process(tokens);
std::string expected = "ProgramRoot\n"
" FunctionDefinition\n"
" FunctionName: main\n"
" FunctionArguments\n"
" FunctionReturnType: NoneType\n"
" BranchRoot\n"
" VariableDeclaration\n"
" TypeName: ListType\n"
" TypeName: IntType\n"
" VariableName: mylist\n";
ASSERT_EQ(expected, tree.dump());
}

TEST(Parser, can_parse_list_defenition) {
vla5924 marked this conversation as resolved.
Show resolved Hide resolved
StringVec source = {
"def main() -> None:",
" mylist : list[int] = [1 + 1, 1 + 2, 2 + 3]",
};
TokenList tokens = Lexer::process(source);
SyntaxTree tree = Parser::process(tokens);
std::string expected = "ProgramRoot\n"
" FunctionDefinition\n"
" FunctionName: main\n"
" FunctionArguments\n"
" FunctionReturnType: NoneType\n"
" BranchRoot\n"
" VariableDeclaration\n"
" TypeName: ListType\n"
" TypeName: IntType\n"
" VariableName: mylist\n"
" Expression\n"
" ListStatement\n"
" Expression\n"
" BinaryOperation: Add\n"
" IntegerLiteralValue: 1\n"
" IntegerLiteralValue: 1\n"
" Expression\n"
" BinaryOperation: Add\n"
" IntegerLiteralValue: 1\n"
" IntegerLiteralValue: 2\n"
" Expression\n"
" BinaryOperation: Add\n"
" IntegerLiteralValue: 2\n"
" IntegerLiteralValue: 3\n";
ASSERT_EQ(expected, tree.dump());
}
Loading