From ee930f3ec404fcb30b676385edf018470cb4cb70 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 4 Aug 2024 14:38:57 -0400 Subject: [PATCH 01/38] feat: generate a function table and initialize with module functions --- src/compiler/CodeGen.cpp | 35 +++++++++++++++++++++++++++++++++++ src/compiler/CodeGen.hpp | 6 +++++- 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index d039fca..5f244ee 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -18,10 +18,14 @@ namespace Theta { BinaryenModuleSetFeatures(module, BinaryenFeatureStrings()); + prepareFunctionTable(module); + StandardLibrary::registerFunctions(module); generate(ast, module); + registerModuleFunctions(module); + BinaryenModuleAutoDrop(module); return module; @@ -124,6 +128,8 @@ namespace Theta { ); } + + // TODO: Functions will be defined as closures which take in the scope of the surrounding block as additional parameters throw new runtime_error("Lambda functions are not yet implemented."); } @@ -182,6 +188,8 @@ namespace Theta { generate(fnDeclNode->getDefinition(), module) ); + fnNamesToAddToTable.push_back(functionName); + if (addToExports) { BinaryenAddFunctionExport(module, functionName.c_str(), functionName.c_str()); } @@ -456,4 +464,31 @@ namespace Theta { scope.insert(identifier, ast->getRight()); } + + void CodeGen::prepareFunctionTable(BinaryenModuleRef &module) { + BinaryenAddTable( + module, + FN_TABLE_NAME.c_str(), + 10, + 1000, + BinaryenTypeFuncref() + ); + } + + void CodeGen::registerModuleFunctions(BinaryenModuleRef &module) { + const char** fnNames = new const char*[fnNamesToAddToTable.size()]; + + for (int i = 0; i < fnNamesToAddToTable.size(); i++) { + fnNames[i] = fnNamesToAddToTable.at(i).c_str(); + } + + BinaryenAddActiveElementSegment( + module, + FN_TABLE_NAME.c_str(), + "0", + fnNames, + fnNamesToAddToTable.size(), + BinaryenConst(module, BinaryenLiteralInt32(0)) + ); + } } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 30fe2d9..a61e7b6 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -43,8 +43,10 @@ namespace Theta { private: SymbolTableStack scope; - + string FN_TABLE_NAME = "0"; + vector fnNamesToAddToTable; string LOCAL_IDX_SCOPE_KEY = "ThetaLang.internal.localIdxCounter"; + string BOOTSTRAP_FUNC_NAME = "ThetaLang.bootstrap"; BinaryenExpressionRef generateStringBinaryOperation(string op, BinaryenExpressionRef left, BinaryenExpressionRef right, BinaryenModuleRef &module); @@ -53,5 +55,7 @@ namespace Theta { void hoistCapsuleElements(vector> elements); void bindIdentifierToScope(shared_ptr ast); + void prepareFunctionTable(BinaryenModuleRef &module); + void registerModuleFunctions(BinaryenModuleRef &module); }; } From 55ff81f57b2ef3aff2a4edf4abbccdbcd093b96b Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 4 Aug 2024 21:07:50 -0400 Subject: [PATCH 02/38] add parent reference to each astnode to allow for upwards traversal if necessary --- src/compiler/CodeGen.cpp | 25 +- src/compiler/CodeGen.hpp | 5 +- src/compiler/TypeChecker.cpp | 64 ++-- src/compiler/TypeChecker.hpp | 4 +- src/compiler/WasmClosure.hpp | 30 ++ .../optimization/LiteralInlinerPass.cpp | 6 +- src/parser/Parser.cpp | 287 ++++++++++-------- src/parser/ast/ASTNode.hpp | 6 +- src/parser/ast/ASTNodeList.hpp | 3 +- src/parser/ast/AssignmentNode.hpp | 3 +- src/parser/ast/BinaryOperationNode.hpp | 3 +- src/parser/ast/BlockNode.hpp | 4 +- src/parser/ast/CapsuleNode.hpp | 3 +- src/parser/ast/ControlFlowNode.hpp | 2 +- src/parser/ast/DictionaryNode.hpp | 4 +- src/parser/ast/EnumNode.hpp | 2 +- src/parser/ast/FunctionDeclarationNode.hpp | 3 +- src/parser/ast/FunctionInvocationNode.hpp | 2 +- src/parser/ast/IdentifierNode.hpp | 3 +- src/parser/ast/LinkNode.hpp | 3 +- src/parser/ast/ListNode.hpp | 5 +- src/parser/ast/LiteralNode.hpp | 4 +- src/parser/ast/ReturnNode.hpp | 3 +- src/parser/ast/SourceNode.hpp | 2 +- src/parser/ast/StructDeclarationNode.hpp | 3 +- src/parser/ast/StructDefinitionNode.hpp | 4 +- src/parser/ast/SymbolNode.hpp | 3 +- src/parser/ast/TupleNode.hpp | 3 +- src/parser/ast/TypeDeclarationNode.hpp | 2 +- src/parser/ast/UnaryOperationNode.hpp | 3 +- 30 files changed, 285 insertions(+), 209 deletions(-) create mode 100644 src/compiler/WasmClosure.hpp diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 5f244ee..655a1b2 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -18,8 +18,6 @@ namespace Theta { BinaryenModuleSetFeatures(module, BinaryenFeatureStrings()); - prepareFunctionTable(module); - StandardLibrary::registerFunctions(module); generate(ast, module); @@ -144,7 +142,7 @@ namespace Theta { BinaryenType parameterType = BinaryenTypeNone(); int totalParams = fnDeclNode->getParameters()->getElements().size(); - scope.insert(LOCAL_IDX_SCOPE_KEY, make_shared(ASTNode::NUMBER_LITERAL, to_string(totalParams))); + scope.insert(LOCAL_IDX_SCOPE_KEY, make_shared(ASTNode::NUMBER_LITERAL, to_string(totalParams), nullptr)); if (totalParams > 0) { BinaryenType* types = new BinaryenType[totalParams]; @@ -188,7 +186,10 @@ namespace Theta { generate(fnDeclNode->getDefinition(), module) ); - fnNamesToAddToTable.push_back(functionName); + functionNameToClosureMap.insert(make_pair( + functionName, + WasmClosure(functionNameToClosureMap.size(), totalParams) + )); if (addToExports) { BinaryenAddFunctionExport(module, functionName.c_str(), functionName.c_str()); @@ -465,21 +466,19 @@ namespace Theta { scope.insert(identifier, ast->getRight()); } - void CodeGen::prepareFunctionTable(BinaryenModuleRef &module) { + void CodeGen::registerModuleFunctions(BinaryenModuleRef &module) { BinaryenAddTable( module, FN_TABLE_NAME.c_str(), - 10, - 1000, + functionNameToClosureMap.size(), + functionNameToClosureMap.size(), BinaryenTypeFuncref() ); - } - void CodeGen::registerModuleFunctions(BinaryenModuleRef &module) { - const char** fnNames = new const char*[fnNamesToAddToTable.size()]; + const char** fnNames = new const char*[functionNameToClosureMap.size()]; - for (int i = 0; i < fnNamesToAddToTable.size(); i++) { - fnNames[i] = fnNamesToAddToTable.at(i).c_str(); + for (auto& [fnName, fnRef] : functionNameToClosureMap) { + fnNames[fnRef.getFunctionIndex()] = fnName.c_str(); } BinaryenAddActiveElementSegment( @@ -487,7 +486,7 @@ namespace Theta { FN_TABLE_NAME.c_str(), "0", fnNames, - fnNamesToAddToTable.size(), + functionNameToClosureMap.size(), BinaryenConst(module, BinaryenLiteralInt32(0)) ); } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index a61e7b6..983b1d1 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -7,6 +7,7 @@ #include "../parser/ast/LiteralNode.hpp" #include "../parser/ast/SourceNode.hpp" #include "compiler/SymbolTableStack.hpp" +#include "compiler/WasmClosure.hpp" #include "parser/ast/ASTNodeList.hpp" #include "parser/ast/AssignmentNode.hpp" #include "parser/ast/CapsuleNode.hpp" @@ -17,6 +18,7 @@ #include "parser/ast/FunctionInvocationNode.hpp" #include "parser/ast/ControlFlowNode.hpp" #include +#include using namespace std; @@ -44,7 +46,7 @@ namespace Theta { private: SymbolTableStack scope; string FN_TABLE_NAME = "0"; - vector fnNamesToAddToTable; + unordered_map functionNameToClosureMap; string LOCAL_IDX_SCOPE_KEY = "ThetaLang.internal.localIdxCounter"; string BOOTSTRAP_FUNC_NAME = "ThetaLang.bootstrap"; @@ -55,7 +57,6 @@ namespace Theta { void hoistCapsuleElements(vector> elements); void bindIdentifierToScope(shared_ptr ast); - void prepareFunctionTable(BinaryenModuleRef &module); void registerModuleFunctions(BinaryenModuleRef &module); }; } diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index bdc93ad..2881dd8 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -75,19 +75,19 @@ namespace Theta { node->setResolvedType(node->getValue()->getResolvedType()); return true; } else if (node->getNodeType() == ASTNode::NUMBER_LITERAL) { - node->setResolvedType(make_shared(DataTypes::NUMBER)); + node->setResolvedType(make_shared(DataTypes::NUMBER, node)); return true; } else if (node->getNodeType() == ASTNode::STRING_LITERAL) { - node->setResolvedType(make_shared(DataTypes::STRING)); + node->setResolvedType(make_shared(DataTypes::STRING, node)); return true; } else if (node->getNodeType() == ASTNode::BOOLEAN_LITERAL) { - node->setResolvedType(make_shared(DataTypes::BOOLEAN)); + node->setResolvedType(make_shared(DataTypes::BOOLEAN, node)); return true; } else if (node->getNodeType() == ASTNode::CAPSULE) { - node->setResolvedType(make_shared(DataTypes::CAPSULE)); + node->setResolvedType(make_shared(DataTypes::CAPSULE, node)); return true; } else if (node->getNodeType() == ASTNode::SYMBOL) { - node->setResolvedType(make_shared(DataTypes::SYMBOL)); + node->setResolvedType(make_shared(DataTypes::SYMBOL, node)); return true; } else if (node->getNodeType() == ASTNode::TYPE_DECLARATION) { return checkTypeDeclarationNode(dynamic_pointer_cast(node)); @@ -216,7 +216,7 @@ namespace Theta { } if (isBooleanOperator(node->getOperator())) { - node->setResolvedType(make_shared(DataTypes::BOOLEAN)); + node->setResolvedType(make_shared(DataTypes::BOOLEAN, node)); } else { node->setResolvedType(node->getLeft()->getResolvedType()); } @@ -229,8 +229,8 @@ namespace Theta { if (!valid) return false; - shared_ptr boolType = make_shared(DataTypes::BOOLEAN); - shared_ptr numType = make_shared(DataTypes::NUMBER); + shared_ptr boolType = make_shared(DataTypes::BOOLEAN, nullptr); + shared_ptr numType = make_shared(DataTypes::NUMBER, nullptr); if (isSameType(node->getValue()->getResolvedType(), boolType) && node->getOperator() != Lexemes::NOT) { Compiler::getInstance().addException( @@ -279,7 +279,7 @@ namespace Theta { if (blockReturnTypes.size() == 1) { node->setResolvedType(blockReturnTypes[0]); } else if (blockReturnTypes.size() > 1) { - node->setResolvedType(makeVariadicType(blockReturnTypes)); + node->setResolvedType(makeVariadicType(blockReturnTypes, node)); } return true; @@ -307,7 +307,7 @@ namespace Theta { if (node->getResolvedType()) { node->getResolvedType()->setValue(node->getDefinition()->getResolvedType()); } else { - shared_ptr funcType = make_shared(DataTypes::FUNCTION); + shared_ptr funcType = make_shared(DataTypes::FUNCTION, node); funcType->setValue(node->getDefinition()->getResolvedType()); node->setResolvedType(funcType); @@ -349,11 +349,11 @@ namespace Theta { if (pair.first) { bool validCondition = checkAST(pair.first); - shared_ptr boolType = make_shared(DataTypes::BOOLEAN); + shared_ptr boolType = make_shared(DataTypes::BOOLEAN, nullptr); vector> typesThatCanBeInterpretedAsBooleans = { boolType, - make_shared(DataTypes::NUMBER) + make_shared(DataTypes::NUMBER, nullptr) }; if (!validCondition || !isOneOfTypes(pair.first->getResolvedType(), typesThatCanBeInterpretedAsBooleans)) { @@ -381,12 +381,12 @@ namespace Theta { // If we have an if without an else, thats fine, but that means we have a potential hole if we try to use this as // a return value to something (like assigning a variable to the result of a control flow). We can return nil as part // of the resolved type of the node, which will cause assignments without an else to fail (as they should) - if (!hasElseBlock) returnTypes.push_back(make_shared(DataTypes::NIL)); + if (!hasElseBlock) returnTypes.push_back(make_shared(DataTypes::NIL, node)); if (returnTypes.size() == 1) { node->setResolvedType(returnTypes[0]); } else { - node->setResolvedType(makeVariadicType(returnTypes)); + node->setResolvedType(makeVariadicType(returnTypes, node)); } return true; @@ -421,10 +421,10 @@ namespace Theta { return false; } - shared_ptr listType = make_shared(DataTypes::LIST); + shared_ptr listType = make_shared(DataTypes::LIST, node); if (returnTypes.size() == 0) { - listType->setValue(make_shared(DataTypes::UNKNOWN)); + listType->setValue(make_shared(DataTypes::UNKNOWN, listType)); } else { listType->setValue(returnTypes.at(0)); } @@ -440,7 +440,7 @@ namespace Theta { if (!validLeft || !validRight) return false; - shared_ptr type = make_shared(DataTypes::TUPLE); + shared_ptr type = make_shared(DataTypes::TUPLE, node); type->setLeft(node->getLeft()->getResolvedType()); type->setRight(node->getRight()->getResolvedType()); @@ -464,7 +464,7 @@ namespace Theta { if (!isKeyValid || !isValValid) return false; - shared_ptr symbolType = make_shared(DataTypes::SYMBOL); + shared_ptr symbolType = make_shared(DataTypes::SYMBOL, nullptr); if (!isSameType(kvTuple->getLeft()->getResolvedType(), symbolType)) { Compiler::getInstance().addException( @@ -492,10 +492,10 @@ namespace Theta { return false; } - shared_ptr dictType = make_shared(DataTypes::DICT); + shared_ptr dictType = make_shared(DataTypes::DICT, node); if (valueTypes.size() == 0) { - dictType->setValue(make_shared(DataTypes::UNKNOWN)); + dictType->setValue(make_shared(DataTypes::UNKNOWN, dictType)); } else { dictType->setValue(valueTypes.at(0)); } @@ -514,7 +514,7 @@ namespace Theta { if (!valid) return false; } - structNode->setResolvedType(make_shared(node->getName())); + structNode->setResolvedType(make_shared(node->getName(), structNode)); shared_ptr existingIdentifierInScope = identifierTable.lookup(node->getName()); @@ -597,7 +597,7 @@ namespace Theta { return false; } - node->setResolvedType(make_shared(node->getStructType())); + node->setResolvedType(make_shared(node->getStructType(), node)); return true; } @@ -638,7 +638,7 @@ namespace Theta { // Initially set the function resolvedType to whatever the identifier type is specified. This will get // updated later when we actually typecheck the function definition to whatever types the function actually returns. // This way, we support recursive function type resolution and cyclic function type resolution - node->getRight()->setResolvedType(deepCopyTypeDeclaration(dynamic_pointer_cast(ident->getValue()))); + node->getRight()->setResolvedType(deepCopyTypeDeclaration(dynamic_pointer_cast(ident->getValue()), node)); capsuleDeclarationsTable.insert(uniqueFuncIdentifier, node->getRight()); } @@ -653,7 +653,7 @@ namespace Theta { return; } - structNode->setResolvedType(make_shared(structNode->getName())); + structNode->setResolvedType(make_shared(structNode->getName(), structNode)); capsuleDeclarationsTable.insert(structNode->getName(), node); } @@ -786,8 +786,8 @@ namespace Theta { return find(BOOLEAN_OPERATORS.begin(), BOOLEAN_OPERATORS.end(), op) != BOOLEAN_OPERATORS.end(); } - shared_ptr TypeChecker::makeVariadicType(vector> types) { - shared_ptr variadicTypeNode = make_shared(DataTypes::VARIADIC); + shared_ptr TypeChecker::makeVariadicType(vector> types, shared_ptr parent) { + shared_ptr variadicTypeNode = make_shared(DataTypes::VARIADIC, parent); // The unique function requires a sorted vector sort(types.begin(), types.end(), [](const shared_ptr& a, const shared_ptr& b) { @@ -839,19 +839,19 @@ namespace Theta { return functionIdentifier; } - shared_ptr TypeChecker::deepCopyTypeDeclaration(shared_ptr original) { - shared_ptr copy = make_shared(original->getType()); + shared_ptr TypeChecker::deepCopyTypeDeclaration(shared_ptr original, shared_ptr parent) { + shared_ptr copy = make_shared(original->getType(), parent); if (original->getValue()) { - copy->setValue(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getValue()))); + copy->setValue(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getValue()), copy)); } else if (original->getLeft()) { - copy->setLeft(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getLeft()))); - copy->setRight(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getRight()))); + copy->setLeft(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getLeft()), copy)); + copy->setRight(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getRight()), copy)); } else if (original->getElements().size() > 0) { vector> copyChildren; for (int i = 0; i < original->getElements().size(); i++) { - copyChildren.push_back(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getElements().at(i)))); + copyChildren.push_back(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getElements().at(i)), copy)); } copy->setElements(copyChildren); diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 0a043d4..534f66d 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -273,7 +273,7 @@ namespace Theta { * @param types The vector of type declaration nodes. * @return shared_ptr The created variadic type node. */ - static shared_ptr makeVariadicType(vector> types); + static shared_ptr makeVariadicType(vector> types, shared_ptr parent); /** * @brief Generates a unique function identifier based on the function's name and its parameters to handle overloading. @@ -291,6 +291,6 @@ namespace Theta { * @param original The original type declaration node to copy. * @return shared_ptr The deep-copied type declaration node. */ - static shared_ptr deepCopyTypeDeclaration(shared_ptr node); + static shared_ptr deepCopyTypeDeclaration(shared_ptr node, shared_ptr parent); }; } diff --git a/src/compiler/WasmClosure.hpp b/src/compiler/WasmClosure.hpp new file mode 100644 index 0000000..090154b --- /dev/null +++ b/src/compiler/WasmClosure.hpp @@ -0,0 +1,30 @@ +#pragma once + +namespace Theta { + class WasmClosure { + public: + WasmClosure(int tableIndex, int arity) { + idx = tableIndex; + arity = arity; + argPointers = new int[arity]; + } + + int getFunctionIndex() { return idx; } + + int getArity() { return arity; } + + int* getArgPointers() { return argPointers; } + + void addArg(int argPtr) { + argPointers[arity] = argPtr; + currentArgs++; + } + + + private: + int idx; + int arity; + int* argPointers; + int currentArgs = 0; + }; +} diff --git a/src/compiler/optimization/LiteralInlinerPass.cpp b/src/compiler/optimization/LiteralInlinerPass.cpp index c275103..ece749f 100644 --- a/src/compiler/optimization/LiteralInlinerPass.cpp +++ b/src/compiler/optimization/LiteralInlinerPass.cpp @@ -58,7 +58,7 @@ void LiteralInlinerPass::substituteIdentifiers(shared_ptr &ast) { shared_ptr literal = dynamic_pointer_cast(foundIdentifier); - ast = make_shared(literal->getNodeType(), literal->getLiteralValue()); + ast = make_shared(literal->getNodeType(), literal->getLiteralValue(), ast); } // When we have a variable assigned to a literal, we can safely just add that to the scope @@ -135,11 +135,11 @@ void LiteralInlinerPass::unpackEnumElementsInScope(shared_ptr node, Sym return; } - scope.insert(enumElIdentifier, make_shared(ASTNode::NUMBER_LITERAL, to_string(i))); + scope.insert(enumElIdentifier, make_shared(ASTNode::NUMBER_LITERAL, to_string(i), nullptr)); } // Insert the enum identifier itself into scope so we can remap types - scope.insert(baseIdentifier, make_shared(DataTypes::NUMBER)); + scope.insert(baseIdentifier, make_shared(DataTypes::NUMBER, nullptr)); } void LiteralInlinerPass::remapEnumTypeReferences(shared_ptr &ast) { diff --git a/src/parser/Parser.cpp b/src/parser/Parser.cpp index 6f997d0..53dbcc2 100644 --- a/src/parser/Parser.cpp +++ b/src/parser/Parser.cpp @@ -73,25 +73,25 @@ namespace Theta { shared_ptr parseSource() { vector> links; + shared_ptr sourceNode = make_shared(); while (match(Token::KEYWORD, Lexemes::LINK)) { - links.push_back(parseLink()); + links.push_back(parseLink(sourceNode)); } - shared_ptr sourceNode = make_shared(); sourceNode->setLinks(links); - sourceNode->setValue(parseCapsule()); + sourceNode->setValue(parseCapsule(sourceNode)); return sourceNode; } - shared_ptr parseLink() { + shared_ptr parseLink(shared_ptr parent) { match(Token::IDENTIFIER); shared_ptr linkNode = Theta::Compiler::getInstance().getIfExistsParsedLinkAST(currentToken.getLexeme()); if (linkNode) return linkNode; - linkNode = make_shared(currentToken.getLexeme()); + linkNode = make_shared(currentToken.getLexeme(), parent); auto fileContainingLinkedCapsule = filesByCapsule->find(currentToken.getLexeme()); @@ -116,35 +116,35 @@ namespace Theta { return linkNode; } - shared_ptr parseCapsule() { + shared_ptr parseCapsule(shared_ptr parent) { if (match(Token::KEYWORD, Lexemes::CAPSULE)) { match(Token::IDENTIFIER); - shared_ptr capsule = make_shared(currentToken.getLexeme()); - capsule->setValue(parseBlock()); + shared_ptr capsule = make_shared(currentToken.getLexeme(), parent); + capsule->setValue(parseBlock(capsule)); return capsule; } - return parseAssignment(); + return parseAssignment(parent); } - shared_ptr parseReturn() { + shared_ptr parseReturn(shared_ptr parent) { if (match(Token::KEYWORD, Lexemes::RETURN)) { - shared_ptr ret = make_shared(); - ret->setValue(parseAssignment()); + shared_ptr ret = make_shared(parent); + ret->setValue(parseAssignment(ret)); return ret; } - return parseStructDefinition(); + return parseStructDefinition(parent); } - shared_ptr parseStructDefinition() { + shared_ptr parseStructDefinition(shared_ptr parent) { if (match(Token::KEYWORD, Lexemes::STRUCT)) { match(Token::IDENTIFIER); - shared_ptr str = make_shared(currentToken.getLexeme()); + shared_ptr str = make_shared(currentToken.getLexeme(), parent); if (!match(Token::BRACE_OPEN)) { Theta::Compiler::getInstance().addException( @@ -162,7 +162,7 @@ namespace Theta { while (!match(Token::BRACE_CLOSE)) { match(Token::IDENTIFIER); - shared_ptr el = parseIdentifier(); + shared_ptr el = parseIdentifier(str); if (el == nullptr) break; @@ -174,67 +174,78 @@ namespace Theta { return str; } - return parseAssignment(); + return parseAssignment(parent); } - shared_ptr parseAssignment() { - shared_ptr expr = parseExpression(); + shared_ptr parseAssignment(shared_ptr parent) { + shared_ptr expr = parseExpression(parent); if (match(Token::ASSIGNMENT)) { shared_ptr left = expr; - expr = make_shared(); + expr = make_shared(parent); + + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseFunctionDeclaration()); + expr->setRight(parseFunctionDeclaration(expr)); } return expr; } - shared_ptr parseBlock() { + shared_ptr parseBlock(shared_ptr parent) { if (match(Token::BRACE_OPEN)) { vector> blockExpr; + shared_ptr block = make_shared(parent); while (!match(Token::BRACE_CLOSE)) { - shared_ptr expr = parseReturn(); + shared_ptr expr = parseReturn(block); if (expr == nullptr) break; blockExpr.push_back(expr); } - shared_ptr block = make_shared(); block->setElements(blockExpr); return block; } - return parseFunctionDeclaration(); + return parseFunctionDeclaration(parent); } - shared_ptr parseFunctionDeclaration() { - shared_ptr expr = parseAssignment(); + shared_ptr parseFunctionDeclaration(shared_ptr parent) { + shared_ptr expr = parseAssignment(parent); if (match(Token::FUNC_DECLARATION)) { - shared_ptr func_def = make_shared(); + shared_ptr func_def = make_shared(parent); if (expr && expr->getNodeType() != ASTNode::AST_NODE_LIST) { - shared_ptr parameters = make_shared(); + shared_ptr parameters = make_shared(func_def); + expr->setParent(parameters); + parameters->setElements({ expr }); expr = parameters; } else if (!expr) { - expr = make_shared(); + expr = make_shared(func_def); } - func_def->setParameters(dynamic_pointer_cast(expr)); + shared_ptr params = dynamic_pointer_cast(expr); + for (auto param : params->getElements()) { + param->setParent(params); + } + + func_def->setParameters(params); - shared_ptr definitionBlock = parseBlock(); + shared_ptr definitionBlock = parseBlock(func_def); // In the case of shorthand single-line function bodies, we still want to wrap them in a block within the ast // for scoping reasons if (definitionBlock->getNodeType() != ASTNode::BLOCK) { - shared_ptr block = make_shared(); + shared_ptr block = make_shared(func_def); + definitionBlock->setParent(block); block->setElements({ definitionBlock }); @@ -249,32 +260,32 @@ namespace Theta { return expr; } - shared_ptr parseExpression() { - return parseStructDeclaration(); + shared_ptr parseExpression(shared_ptr parent) { + return parseStructDeclaration(parent); } - shared_ptr parseStructDeclaration() { + shared_ptr parseStructDeclaration(shared_ptr parent) { if (match(Token::AT)) { match(Token::IDENTIFIER); - shared_ptr str = make_shared(currentToken.getLexeme()); + shared_ptr str = make_shared(currentToken.getLexeme(), parent); match(Token::BRACE_OPEN); - str->setValue(parseDict()); + str->setValue(parseDict(str)); return str; } - return parseEnum(); + return parseEnum(parent); } - shared_ptr parseEnum() { + shared_ptr parseEnum(shared_ptr parent) { if (match(Token::KEYWORD, Lexemes::ENUM)) { match(Token::IDENTIFIER); - shared_ptr root = make_shared(); - root->setIdentifier(parseIdentifier()); + shared_ptr root = make_shared(parent); + root->setIdentifier(parseIdentifier(root)); if (!match(Token::BRACE_OPEN)) { Theta::Compiler::getInstance().addException( @@ -309,7 +320,7 @@ namespace Theta { continue; } - shared_ptr node = parseSymbol(); + shared_ptr node = parseSymbol(root); if (!node) break; @@ -321,23 +332,23 @@ namespace Theta { return root; } - return parseControlFlow(); + return parseControlFlow(parent); } - shared_ptr parseControlFlow() { + shared_ptr parseControlFlow(shared_ptr parent) { if (match(Token::KEYWORD, Lexemes::IF)) { - shared_ptr cfNode = make_shared(); + shared_ptr cfNode = make_shared(parent); vector, shared_ptr>> conditionExpressionPairs = { - make_pair(parseExpression(), parseBlock()) + make_pair(parseExpression(cfNode), parseBlock(cfNode)) }; while (match(Token::KEYWORD, Lexemes::ELSE) && match(Token::KEYWORD, Lexemes::IF)) { - conditionExpressionPairs.push_back(make_pair(parseExpression(), parseBlock())); + conditionExpressionPairs.push_back(make_pair(parseExpression(cfNode), parseBlock(cfNode))); } // If we just matched an else but no if afterwards. This way it only matches one else block per control flow if (currentToken.getType() == Token::KEYWORD && currentToken.getLexeme() == Lexemes::ELSE) { - conditionExpressionPairs.push_back(make_pair(nullptr, parseBlock())); + conditionExpressionPairs.push_back(make_pair(nullptr, parseBlock(cfNode))); } cfNode->setConditionExpressionPairs(conditionExpressionPairs); @@ -345,49 +356,53 @@ namespace Theta { return cfNode; } - return parsePipeline(); + return parsePipeline(parent); } - shared_ptr parsePipeline() { - shared_ptr expr = parseBooleanComparison(); + shared_ptr parsePipeline(shared_ptr parent) { + shared_ptr expr = parseBooleanComparison(parent); while (match(Token::OPERATOR, Lexemes::PIPE)) { - expr = parseBooleanComparison(expr); + expr = parseBooleanComparison(parent, expr); } return expr; } - shared_ptr parseBooleanComparison(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseEquality(passedLeftArg); + shared_ptr parseBooleanComparison(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseEquality(parent, passedLeftArg); while (match(Token::OPERATOR, Lexemes::OR) || match(Token::OPERATOR, Lexemes::AND)) { shared_ptr left = expr; - expr = make_shared(currentToken.getLexeme()); + expr = make_shared(currentToken.getLexeme(), parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseExpression()); + expr->setRight(parseExpression(expr)); } return expr; } - shared_ptr parseEquality(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseComparison(passedLeftArg); + shared_ptr parseEquality(shared_ptrparent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseComparison(parent, passedLeftArg); while (match(Token::OPERATOR, Lexemes::EQUALITY) || match(Token::OPERATOR, Lexemes::INEQUALITY)) { shared_ptr left = expr; - expr = make_shared(currentToken.getLexeme()); + expr = make_shared(currentToken.getLexeme(), parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseComparison()); + expr->setRight(parseComparison(expr)); } return expr; } - shared_ptr parseComparison(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseTerm(passedLeftArg); + shared_ptr parseComparison(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseTerm(parent, passedLeftArg); while ( match(Token::OPERATOR, Lexemes::GT) || @@ -397,30 +412,34 @@ namespace Theta { ) { shared_ptr left = expr; - expr = make_shared(currentToken.getLexeme()); + expr = make_shared(currentToken.getLexeme(), parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseTerm()); + expr->setRight(parseTerm(expr)); } return expr; } - shared_ptr parseTerm(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseFactor(passedLeftArg); + shared_ptr parseTerm(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseFactor(parent, passedLeftArg); while (match(Token::OPERATOR, Lexemes::MINUS) || match(Token::OPERATOR, Lexemes::PLUS)) { shared_ptr left = expr; - expr = make_shared(currentToken.getLexeme()); + expr = make_shared(currentToken.getLexeme(), parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseFactor()); + expr->setRight(parseFactor(expr)); } return expr; } - shared_ptr parseFactor(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseExponent(passedLeftArg); + shared_ptr parseFactor(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseExponent(parent, passedLeftArg); while ( match(Token::OPERATOR, Lexemes::DIVISION) || @@ -429,43 +448,47 @@ namespace Theta { ) { shared_ptr left = expr; - expr = make_shared(currentToken.getLexeme()); + expr = make_shared(currentToken.getLexeme(), parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseExponent()); + expr->setRight(parseExponent(expr)); } return expr; } - shared_ptr parseExponent(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseUnary(passedLeftArg); + shared_ptr parseExponent(shared_ptrparent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseUnary(parent, passedLeftArg); while (match(Token::OPERATOR, Lexemes::EXPONENT)) { shared_ptr left = expr; - expr = make_shared(currentToken.getLexeme()); + expr = make_shared(currentToken.getLexeme(), parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseUnary()); + expr->setRight(parseUnary(expr)); } return expr; } - shared_ptr parseUnary(shared_ptr passedLeftArg = nullptr) { + shared_ptr parseUnary(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { // Unary cant have a left arg, so if we get one passed in we can skip straight to primary if (!passedLeftArg && (match(Token::OPERATOR, Lexemes::NOT) || match(Token::OPERATOR, Lexemes::MINUS))) { - shared_ptr un = make_shared(currentToken.getLexeme()); - un->setValue(parseUnary(passedLeftArg)); + shared_ptr un = make_shared(currentToken.getLexeme(), parent); + un->setValue(parseUnary(passedLeftArg, un)); return un; } - return parsePrimary(passedLeftArg); + return parsePrimary(parent, passedLeftArg); } - shared_ptr parsePrimary(shared_ptr passedLeftArg = nullptr) { + shared_ptr parsePrimary(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { if (match(Token::IDENTIFIER)) { - return parseFunctionInvocation(passedLeftArg); + return parseFunctionInvocation(parent, passedLeftArg); } if (passedLeftArg) return passedLeftArg; @@ -486,39 +509,42 @@ namespace Theta { value = value.substr(1, value.length() - 2); } - return make_shared(it->second, value); + return make_shared(it->second, value, parent); } if (match(Token::COLON)) { - return parseSymbol(); + return parseSymbol(parent); } if (match(Token::BRACKET_OPEN)) { - return parseList(); + return parseList(parent); } if (match(Token::BRACE_OPEN)) { - return parseDict(); + return parseDict(parent); } if (match(Token::PAREN_OPEN)) { - return parseExpressionList(); + return parseExpressionList(parent); } return nullptr; } - shared_ptr parseExpressionList(bool forceList = false) { - shared_ptr expr = parseFunctionDeclaration(); + shared_ptr parseExpressionList(shared_ptr parent, bool forceList = false) { + shared_ptr expr = parseFunctionDeclaration(parent); if (check(Token::COMMA) || !expr || forceList) { - shared_ptr nodeList = make_shared(); + shared_ptr nodeList = make_shared(parent); vector> expressions; - if (expr) expressions.push_back(expr); + if (expr) { + expr->setParent(nodeList); + expressions.push_back(expr); + } while (match(Token::COMMA)) { - expressions.push_back(parseFunctionDeclaration()); + expressions.push_back(parseFunctionDeclaration(nodeList)); } nodeList->setElements(expressions); @@ -531,20 +557,25 @@ namespace Theta { return expr; } - shared_ptr parseDict() { - pair> p = parseKvPair(); + shared_ptr parseDict(shared_ptr parent) { + pair> p = parseKvPair(parent); shared_ptr expr = p.second; if (p.first == "kv" && expr && expr->getNodeType() == ASTNode::TUPLE) { - vector> el; + vector> el; if (expr->getLeft()) el.push_back(expr); while (match(Token::COMMA)) { - el.push_back(parseKvPair().second); + el.push_back(parseKvPair(parent).second); + } + + expr = make_shared(parent); + + for (auto e : el) { + e->setParent(expr); } - expr = make_shared(); dynamic_pointer_cast(expr)->setElements(el); match(Token::BRACE_CLOSE); @@ -553,41 +584,43 @@ namespace Theta { return expr; } - pair> parseKvPair() { + pair> parseKvPair(shared_ptr parent) { // Because both flows of this function return a tuple, we need a type flag to indicate whether // we generated the tuple with the intention of it being a kvPair or not. Otherwise it would // be ambiguous and we would accidentally convert dicts with a single key-value pair into a tuple string type = "tuple"; - shared_ptr expr = parseTuple(); + shared_ptr expr = parseTuple(parent); if (match(Token::COLON)) { type = "kv"; shared_ptr left = expr; if (left->getNodeType() == ASTNode::IDENTIFIER) { - left = make_shared(dynamic_pointer_cast(left)->getIdentifier()); + left = make_shared(dynamic_pointer_cast(left)->getIdentifier(), expr); } - expr = make_shared(); + expr = make_shared(parent); + left->setParent(expr); + expr->setLeft(left); - expr->setRight(parseExpression()); + expr->setRight(parseExpression(expr)); } else if (expr == nullptr) { // parseTuplen will return a nullptr if it just immediately encounters a BRACE_CLOSE. We can treat this // as a dict since a valid tuple must have 2 values in it. type = "kv"; - expr = make_shared(); + expr = make_shared(parent); } return make_pair(type, expr); } - shared_ptr parseTuple() { + shared_ptr parseTuple(shared_ptr parent) { shared_ptr expr; if (match(Token::BRACE_CLOSE)) return nullptr; try { - expr = parseExpression(); + expr = parseExpression(parent); } catch (ParseError e) { if (e.getErrorParseType() == "symbol") remainingTokens->pop_front(); } @@ -595,11 +628,12 @@ namespace Theta { if (match(Token::COMMA)) { shared_ptr first = expr; - expr = make_shared(); + expr = make_shared(parent); + first->setParent(expr); expr->setLeft(first); try { - expr->setRight(parseExpression()); + expr->setRight(parseExpression(expr)); } catch (ParseError e) { if (e.getErrorParseType() == "symbol") remainingTokens->pop_front(); } @@ -621,15 +655,15 @@ namespace Theta { return expr; } - shared_ptr parseList() { - shared_ptr listNode = make_shared(); + shared_ptr parseList(shared_ptr parent) { + shared_ptr listNode = make_shared(parent); vector> el; if (!match(Token::BRACKET_CLOSE)) { - el.push_back(parseExpression()); + el.push_back(parseExpression(listNode)); while(match(Token::COMMA)) { - el.push_back(parseExpression()); + el.push_back(parseExpression(listNode)); } listNode->setElements(el); @@ -640,13 +674,14 @@ namespace Theta { return listNode; } - shared_ptr parseFunctionInvocation(shared_ptr passedLeftArg = nullptr) { - shared_ptr expr = parseIdentifier(); + shared_ptr parseFunctionInvocation(shared_ptr parent, shared_ptr passedLeftArg = nullptr) { + shared_ptr expr = parseIdentifier(parent); if (match(Token::PAREN_OPEN)) { - shared_ptr funcInvNode = make_shared(); + shared_ptr funcInvNode = make_shared(parent); + expr->setParent(funcInvNode); funcInvNode->setIdentifier(expr); - shared_ptr arguments = dynamic_pointer_cast(parseExpressionList(true)); + shared_ptr arguments = dynamic_pointer_cast(parseExpressionList(funcInvNode, true)); // This is used for pipeline operators pointing to function invocations. It takes the passed // left arg and sets it as the first argument to the function call @@ -662,13 +697,13 @@ namespace Theta { return expr; } - shared_ptr parseIdentifier() { + shared_ptr parseIdentifier(shared_ptr parent) { validateIdentifier(currentToken); - shared_ptr ident = make_shared(currentToken.getLexeme()); + shared_ptr ident = make_shared(currentToken.getLexeme(), parent); if (match(Token::OPERATOR, Lexemes::LT)) { - ident->setValue(parseType()); + ident->setValue(parseType(ident)); match(Token::OPERATOR, Lexemes::GT); } @@ -676,14 +711,14 @@ namespace Theta { return ident; } - shared_ptr parseType() { + shared_ptr parseType(shared_ptr parent) { match(Token::IDENTIFIER); string typeName = currentToken.getLexeme(); - shared_ptr typ = make_shared(typeName); + shared_ptr typ = make_shared(typeName, parent); if (match(Token::OPERATOR, Lexemes::LT)) { - shared_ptr l = parseType(); + shared_ptr l = parseType(typ); if (typeName == DataTypes::VARIADIC) { shared_ptr variadic = dynamic_pointer_cast(typ); @@ -691,13 +726,13 @@ namespace Theta { types.push_back(l); while (match(Token::COMMA)) { - types.push_back(parseType()); + types.push_back(parseType(typ)); } variadic->setElements(types); } else if (match(Token::COMMA)) { typ->setLeft(l); - typ->setRight(parseType()); + typ->setRight(parseType(typ)); } else { typ->setValue(l); } @@ -708,11 +743,11 @@ namespace Theta { return typ; } - shared_ptr parseSymbol() { + shared_ptr parseSymbol(shared_ptr parent) { if (match(Token::IDENTIFIER) || match(Token::NUMBER)) { if (currentToken.getType() == Token::IDENTIFIER) validateIdentifier(currentToken); - return make_shared(currentToken.getLexeme()); + return make_shared(currentToken.getLexeme(), parent); } Theta::Compiler::getInstance().addException( diff --git a/src/parser/ast/ASTNode.hpp b/src/parser/ast/ASTNode.hpp index c1df4b3..9374cc6 100644 --- a/src/parser/ast/ASTNode.hpp +++ b/src/parser/ast/ASTNode.hpp @@ -53,9 +53,10 @@ namespace Theta { shared_ptr left; shared_ptr right; shared_ptr resolvedType; + shared_ptr parent; int mappedBinaryenIndex; - ASTNode(ASTNode::Types type) : nodeType(type), value(nullptr) {}; + ASTNode(ASTNode::Types type, shared_ptr par) : nodeType(type), parent(par), value(nullptr) {}; virtual void setValue(shared_ptr childNode) { value = childNode; } virtual shared_ptr& getValue() { return value; } @@ -69,6 +70,9 @@ namespace Theta { virtual int getMappedBinaryenIndex() { return mappedBinaryenIndex; } virtual void setMappedBinaryenIndex(int idx) { mappedBinaryenIndex = idx; } + virtual void setParent(shared_ptr parentNode) { parent = parentNode; } + virtual shared_ptr& getParent() { return parent; } + void setResolvedType(shared_ptr typeNode) { resolvedType = typeNode; } shared_ptr getResolvedType() { return resolvedType; } diff --git a/src/parser/ast/ASTNodeList.hpp b/src/parser/ast/ASTNodeList.hpp index 8c5949a..efa8a39 100644 --- a/src/parser/ast/ASTNodeList.hpp +++ b/src/parser/ast/ASTNodeList.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -12,7 +13,7 @@ namespace Theta { public: vector> elements; - ASTNodeList(ASTNode::Types type = ASTNode::AST_NODE_LIST) : ASTNode(type) {}; + ASTNodeList(shared_ptr parent, ASTNode::Types type = ASTNode::AST_NODE_LIST) : ASTNode(type, parent) {}; void setElements(vector> el) { elements = el; } diff --git a/src/parser/ast/AssignmentNode.hpp b/src/parser/ast/AssignmentNode.hpp index 57c3894..a970440 100644 --- a/src/parser/ast/AssignmentNode.hpp +++ b/src/parser/ast/AssignmentNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -9,7 +10,7 @@ using namespace std; namespace Theta { class AssignmentNode : public ASTNode { public: - AssignmentNode() : ASTNode(ASTNode::ASSIGNMENT) {}; + AssignmentNode(shared_ptr parent) : ASTNode(ASTNode::ASSIGNMENT, parent) {}; string toJSON() const override { ostringstream oss; diff --git a/src/parser/ast/BinaryOperationNode.hpp b/src/parser/ast/BinaryOperationNode.hpp index 50d8cd8..c5eee36 100644 --- a/src/parser/ast/BinaryOperationNode.hpp +++ b/src/parser/ast/BinaryOperationNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string operatorSymbol; - BinaryOperationNode(string op) : operatorSymbol(op), ASTNode(ASTNode::BINARY_OPERATION) {}; + BinaryOperationNode(string op, shared_ptr parent) : ASTNode(ASTNode::BINARY_OPERATION, parent), operatorSymbol(op) {}; string getOperator() { return operatorSymbol; } diff --git a/src/parser/ast/BlockNode.hpp b/src/parser/ast/BlockNode.hpp index 6ac5f36..239e27a 100644 --- a/src/parser/ast/BlockNode.hpp +++ b/src/parser/ast/BlockNode.hpp @@ -1,7 +1,5 @@ #pragma once -#include -#include #include "ASTNode.hpp" #include "ASTNodeList.hpp" @@ -10,7 +8,7 @@ using namespace std; namespace Theta { class BlockNode : public ASTNodeList { public: - BlockNode() : ASTNodeList(ASTNode::BLOCK) {}; + BlockNode(shared_ptr parent) : ASTNodeList(parent, ASTNode::BLOCK) {}; bool hasOwnScope() override { return true; } }; diff --git a/src/parser/ast/CapsuleNode.hpp b/src/parser/ast/CapsuleNode.hpp index 5fe5d9e..5e7fb48 100644 --- a/src/parser/ast/CapsuleNode.hpp +++ b/src/parser/ast/CapsuleNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string name; - CapsuleNode(string n) : name(n), ASTNode(ASTNode::CAPSULE) {}; + CapsuleNode(string n, shared_ptr parent) : ASTNode(ASTNode::CAPSULE, parent), name(n) {}; string getName() { return name; } diff --git a/src/parser/ast/ControlFlowNode.hpp b/src/parser/ast/ControlFlowNode.hpp index 83968ed..fb36abb 100644 --- a/src/parser/ast/ControlFlowNode.hpp +++ b/src/parser/ast/ControlFlowNode.hpp @@ -12,7 +12,7 @@ namespace Theta { public: vector, shared_ptr>> conditionExpressionPairs; - ControlFlowNode() : ASTNode(ASTNode::CONTROL_FLOW) {}; + ControlFlowNode(shared_ptr parent) : ASTNode(ASTNode::CONTROL_FLOW, parent) {}; void setConditionExpressionPairs(vector, shared_ptr>> cnd) { conditionExpressionPairs = cnd; diff --git a/src/parser/ast/DictionaryNode.hpp b/src/parser/ast/DictionaryNode.hpp index 5d49ae3..5ca2db5 100644 --- a/src/parser/ast/DictionaryNode.hpp +++ b/src/parser/ast/DictionaryNode.hpp @@ -1,7 +1,5 @@ #pragma once -#include -#include #include "ASTNode.hpp" #include "ASTNodeList.hpp" @@ -10,6 +8,6 @@ using namespace std; namespace Theta { class DictionaryNode : public ASTNodeList { public: - DictionaryNode() : ASTNodeList(ASTNode::DICTIONARY) {}; + DictionaryNode(shared_ptr parent) : ASTNodeList(parent, ASTNode::DICTIONARY) {}; }; } diff --git a/src/parser/ast/EnumNode.hpp b/src/parser/ast/EnumNode.hpp index ef0040d..bb9d35a 100644 --- a/src/parser/ast/EnumNode.hpp +++ b/src/parser/ast/EnumNode.hpp @@ -13,7 +13,7 @@ namespace Theta { public: shared_ptr identifier; - EnumNode() : ASTNodeList(ASTNode::ENUM) {}; + EnumNode(shared_ptr parent) : ASTNodeList(parent, ASTNode::ENUM) {}; void setIdentifier(shared_ptr ident) { identifier = ident; } diff --git a/src/parser/ast/FunctionDeclarationNode.hpp b/src/parser/ast/FunctionDeclarationNode.hpp index 18096a8..8fb8256 100644 --- a/src/parser/ast/FunctionDeclarationNode.hpp +++ b/src/parser/ast/FunctionDeclarationNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -13,7 +14,7 @@ namespace Theta { shared_ptr parameters; shared_ptr definition; - FunctionDeclarationNode() : ASTNode(ASTNode::FUNCTION_DECLARATION) {}; + FunctionDeclarationNode(shared_ptr parent) : ASTNode(ASTNode::FUNCTION_DECLARATION, parent) {}; void setParameters(shared_ptr params) { parameters = params; } diff --git a/src/parser/ast/FunctionInvocationNode.hpp b/src/parser/ast/FunctionInvocationNode.hpp index 2278d3b..dc69841 100644 --- a/src/parser/ast/FunctionInvocationNode.hpp +++ b/src/parser/ast/FunctionInvocationNode.hpp @@ -12,7 +12,7 @@ using namespace std; namespace Theta { class FunctionInvocationNode : public ASTNode { public: - FunctionInvocationNode() : ASTNode(ASTNode::FUNCTION_INVOCATION) {}; + FunctionInvocationNode(shared_ptr parent) : ASTNode(ASTNode::FUNCTION_INVOCATION, parent) {}; shared_ptr identifier; shared_ptr arguments; diff --git a/src/parser/ast/IdentifierNode.hpp b/src/parser/ast/IdentifierNode.hpp index 6ff999e..2f30b20 100644 --- a/src/parser/ast/IdentifierNode.hpp +++ b/src/parser/ast/IdentifierNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string identifier; - IdentifierNode(string ident) : identifier(ident), ASTNode(ASTNode::IDENTIFIER) {}; + IdentifierNode(string ident, shared_ptr parent) : ASTNode(ASTNode::IDENTIFIER, parent), identifier(ident) {}; string getIdentifier() { return identifier; } diff --git a/src/parser/ast/LinkNode.hpp b/src/parser/ast/LinkNode.hpp index 88bb399..85ea8bc 100644 --- a/src/parser/ast/LinkNode.hpp +++ b/src/parser/ast/LinkNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string capsule; - LinkNode(string cap) : capsule(cap), ASTNode(ASTNode::LINK) {}; + LinkNode(string cap, shared_ptr parent) : ASTNode(ASTNode::LINK, parent), capsule(cap) {}; string toJSON() const override { ostringstream oss; diff --git a/src/parser/ast/ListNode.hpp b/src/parser/ast/ListNode.hpp index c395d51..480ca59 100644 --- a/src/parser/ast/ListNode.hpp +++ b/src/parser/ast/ListNode.hpp @@ -1,15 +1,14 @@ #pragma once -#include -#include #include "ASTNode.hpp" #include "ASTNodeList.hpp" +#include using namespace std; namespace Theta { class ListNode : public ASTNodeList { public: - ListNode() : ASTNodeList(ASTNode::LIST) {}; + ListNode(shared_ptr parent) : ASTNodeList(parent, ASTNode::LIST) {}; }; } diff --git a/src/parser/ast/LiteralNode.hpp b/src/parser/ast/LiteralNode.hpp index f4721bc..86e79d7 100644 --- a/src/parser/ast/LiteralNode.hpp +++ b/src/parser/ast/LiteralNode.hpp @@ -1,9 +1,9 @@ #pragma once +#include #include #include #include "ASTNode.hpp" -#include "../../lexer/Token.hpp" using namespace std; @@ -12,7 +12,7 @@ namespace Theta { public: string literalValue; - LiteralNode(ASTNode::Types typ, string val) : ASTNode(typ), literalValue(val) {}; + LiteralNode(ASTNode::Types typ, string val, shared_ptr parent) : ASTNode(typ, parent), literalValue(val) {}; string getLiteralValue() { return literalValue; } void setLiteralValue(string val) { literalValue = val; } diff --git a/src/parser/ast/ReturnNode.hpp b/src/parser/ast/ReturnNode.hpp index 2a8e2f2..548d9d4 100644 --- a/src/parser/ast/ReturnNode.hpp +++ b/src/parser/ast/ReturnNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -9,7 +10,7 @@ using namespace std; namespace Theta { class ReturnNode : public ASTNode { public: - ReturnNode() : ASTNode(ASTNode::RETURN) {}; + ReturnNode(shared_ptr parent) : ASTNode(ASTNode::RETURN, parent) {}; string toJSON() const override { ostringstream oss; diff --git a/src/parser/ast/SourceNode.hpp b/src/parser/ast/SourceNode.hpp index c0d2133..9e47654 100644 --- a/src/parser/ast/SourceNode.hpp +++ b/src/parser/ast/SourceNode.hpp @@ -11,7 +11,7 @@ namespace Theta { public: vector> links; - SourceNode() : ASTNode(ASTNode::SOURCE) {}; + SourceNode() : ASTNode(ASTNode::SOURCE, nullptr) {}; void setLinks(vector> ln) { links = ln; } diff --git a/src/parser/ast/StructDeclarationNode.hpp b/src/parser/ast/StructDeclarationNode.hpp index 242c667..b9c0e35 100644 --- a/src/parser/ast/StructDeclarationNode.hpp +++ b/src/parser/ast/StructDeclarationNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string structType; - StructDeclarationNode(string type) : structType(type), ASTNode(ASTNode::STRUCT_DECLARATION) {}; + StructDeclarationNode(string type, shared_ptr parent) : ASTNode(ASTNode::STRUCT_DECLARATION, parent), structType(type) {}; string getStructType() { return structType; } diff --git a/src/parser/ast/StructDefinitionNode.hpp b/src/parser/ast/StructDefinitionNode.hpp index a4c404a..df2989f 100644 --- a/src/parser/ast/StructDefinitionNode.hpp +++ b/src/parser/ast/StructDefinitionNode.hpp @@ -1,7 +1,7 @@ #pragma once +#include #include -#include #include "ASTNode.hpp" #include "ASTNodeList.hpp" @@ -12,7 +12,7 @@ namespace Theta { public: string name; - StructDefinitionNode(string n) : ASTNodeList(ASTNode::STRUCT_DEFINITION), name(n) {}; + StructDefinitionNode(string n, shared_ptr parent) : ASTNodeList(parent, ASTNode::STRUCT_DEFINITION), name(n) {}; string getName() { return name; } }; diff --git a/src/parser/ast/SymbolNode.hpp b/src/parser/ast/SymbolNode.hpp index 5b1a88b..de9c17e 100644 --- a/src/parser/ast/SymbolNode.hpp +++ b/src/parser/ast/SymbolNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string symbol; - SymbolNode(string sym) : symbol(":" + sym), ASTNode(ASTNode::SYMBOL) {}; + SymbolNode(string sym, shared_ptr parent) : ASTNode(ASTNode::SYMBOL, parent), symbol(":" + sym) {}; string getSymbol() { return symbol; } diff --git a/src/parser/ast/TupleNode.hpp b/src/parser/ast/TupleNode.hpp index d8d8596..9bb9683 100644 --- a/src/parser/ast/TupleNode.hpp +++ b/src/parser/ast/TupleNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -9,7 +10,7 @@ using namespace std; namespace Theta { class TupleNode : public ASTNode { public: - TupleNode() : ASTNode(ASTNode::TUPLE) {}; + TupleNode(shared_ptr parent) : ASTNode(ASTNode::TUPLE, parent) {}; string toJSON() const override { std::ostringstream oss; diff --git a/src/parser/ast/TypeDeclarationNode.hpp b/src/parser/ast/TypeDeclarationNode.hpp index 6af8bdf..493b90d 100644 --- a/src/parser/ast/TypeDeclarationNode.hpp +++ b/src/parser/ast/TypeDeclarationNode.hpp @@ -13,7 +13,7 @@ namespace Theta { public: string type; - TypeDeclarationNode(string typ) : type(typ), ASTNodeList(ASTNode::TYPE_DECLARATION) {}; + TypeDeclarationNode(string typ, shared_ptr parent) : ASTNodeList(parent, ASTNode::TYPE_DECLARATION), type(typ) {}; string getType() { return type; } diff --git a/src/parser/ast/UnaryOperationNode.hpp b/src/parser/ast/UnaryOperationNode.hpp index 358f577..58ce2a2 100644 --- a/src/parser/ast/UnaryOperationNode.hpp +++ b/src/parser/ast/UnaryOperationNode.hpp @@ -1,5 +1,6 @@ #pragma once +#include #include #include #include "ASTNode.hpp" @@ -11,7 +12,7 @@ namespace Theta { public: string operatorSymbol; - UnaryOperationNode(string op) : ASTNode(ASTNode::UNARY_OPERATION), operatorSymbol(op) {}; + UnaryOperationNode(string op, shared_ptr parent) : ASTNode(ASTNode::UNARY_OPERATION, parent), operatorSymbol(op) {}; string getOperator() { return operatorSymbol; } From 91d4537d7c5b4a6a114492182daebda1e15c3658 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 5 Aug 2024 00:15:10 -0400 Subject: [PATCH 03/38] not working but getting there on nested function simplification --- src/compiler/CodeGen.cpp | 75 +++++++++++++++++++++++++++++++++++++- src/compiler/CodeGen.hpp | 4 ++ src/parser/ast/ASTNode.cpp | 3 ++ src/parser/ast/ASTNode.hpp | 9 ++++- 4 files changed, 89 insertions(+), 2 deletions(-) create mode 100644 src/parser/ast/ASTNode.cpp diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 655a1b2..6485c39 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -1,7 +1,10 @@ +#include #include #include +#include #include #include +#include #include "binaryen-c.h" #include "compiler/Compiler.hpp" #include "lexer/Lexemes.hpp" @@ -9,6 +12,7 @@ #include "CodeGen.hpp" #include "DataTypes.hpp" #include "parser/ast/AssignmentNode.hpp" +#include "parser/ast/FunctionDeclarationNode.hpp" #include "parser/ast/IdentifierNode.hpp" #include "parser/ast/TypeDeclarationNode.hpp" @@ -126,12 +130,81 @@ namespace Theta { ); } - + generateClosure(dynamic_pointer_cast(assignmentNode->getRight()), module); // TODO: Functions will be defined as closures which take in the scope of the surrounding block as additional parameters throw new runtime_error("Lambda functions are not yet implemented."); } + // Transforms nested function declarations and generates an anonymous function in the function table + void CodeGen::generateClosure(shared_ptr fnDeclNode, BinaryenModuleRef &module) { + // Capture the outer scope + set requiredScopeIdentifiers; + set paramIdentifiers; + + cout << "GENERATING CLOSURE FOR FUNCTION, AST NODE ID: " << to_string(fnDeclNode->getId()) << endl; + + for (auto param : fnDeclNode->getParameters()->getElements()) { + paramIdentifiers.insert(dynamic_pointer_cast(param)->getIdentifier()); + } + + vector> identifiersInBody = Compiler::findAllInTree(fnDeclNode->getDefinition(), ASTNode::IDENTIFIER); + + for (auto ident : identifiersInBody) { + string identifierName = dynamic_pointer_cast(ident)->getIdentifier(); + + // Only add identifiers that are not present in the function params + if (paramIdentifiers.find(identifierName) != paramIdentifiers.end()) continue; + + // If an identifier is globally available we dont need to include it either + shared_ptr inScope = scope.lookup(identifierName); + if (inScope->getMappedBinaryenIndex() == -1) continue; + + requiredScopeIdentifiers.insert(identifierName); + } + + // Find any identifiers that were passed in as parameters + deque> identifiersFromParams = findParameterizedIdentifiersFromAncestors(fnDeclNode, requiredScopeIdentifiers); + + // If we've traversed the tree for parameters and we still have some missing identifiers, they must be defined in bodies + if (requiredScopeIdentifiers.size() > 0) { + + } + } + + deque> CodeGen::findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, deque> found) { + if (identifiersToFind.size() == 0 || node->getParent()->getNodeType() == ASTNode::CAPSULE) return found; + + cout << "TEEHEE" << endl; + cout << "PARENT IS: " << node->getParent()->toJSON() << endl; + + if (node->getParent()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { + return findParameterizedIdentifiersFromAncestors(node->parent, identifiersToFind, found); + } + + shared_ptr parent = dynamic_pointer_cast(node->getParent()); + + unordered_map> paramIdentifiers; + + for (auto param : parent->getParameters()->getElements()) { + paramIdentifiers.insert(make_pair( + dynamic_pointer_cast(param)->getIdentifier(), + param + )); + } + + for (auto ident : identifiersToFind) { + auto param = paramIdentifiers.find(ident); + + if (param == paramIdentifiers.end()) continue; + + found.push_front(param->second); + identifiersToFind.erase(ident); + } + + return findParameterizedIdentifiersFromAncestors(parent, identifiersToFind, found); + } + BinaryenExpressionRef CodeGen::generateFunctionDeclaration( string identifier, shared_ptr fnDeclNode, diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 983b1d1..02ed78b 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -18,6 +18,7 @@ #include "parser/ast/FunctionInvocationNode.hpp" #include "parser/ast/ControlFlowNode.hpp" #include +#include #include using namespace std; @@ -32,6 +33,7 @@ namespace Theta { BinaryenExpressionRef generateBlock(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateReturn(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateFunctionDeclaration(string identifier, shared_ptr node, BinaryenModuleRef &module, bool addToExports = false); + void generateClosure(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateFunctionInvocation(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module); @@ -58,5 +60,7 @@ namespace Theta { void hoistCapsuleElements(vector> elements); void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); + + deque> findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, deque> found = {}); }; } diff --git a/src/parser/ast/ASTNode.cpp b/src/parser/ast/ASTNode.cpp new file mode 100644 index 0000000..cfa5c63 --- /dev/null +++ b/src/parser/ast/ASTNode.cpp @@ -0,0 +1,3 @@ +#include "ASTNode.hpp" + +int Theta::ASTNode::nextId = 0; diff --git a/src/parser/ast/ASTNode.hpp b/src/parser/ast/ASTNode.hpp index 9374cc6..a0abe4c 100644 --- a/src/parser/ast/ASTNode.hpp +++ b/src/parser/ast/ASTNode.hpp @@ -45,9 +45,11 @@ namespace Theta { UNARY_OPERATION }; + static int nextId; virtual ASTNode::Types getNodeType() { return nodeType; } virtual string getNodeTypePretty() const { return nodeTypeToString(nodeType); } virtual string toJSON() const = 0; + int id; ASTNode::Types nodeType; shared_ptr value; shared_ptr left; @@ -56,7 +58,12 @@ namespace Theta { shared_ptr parent; int mappedBinaryenIndex; - ASTNode(ASTNode::Types type, shared_ptr par) : nodeType(type), parent(par), value(nullptr) {}; + ASTNode(ASTNode::Types type, shared_ptr par) : nodeType(type), parent(par), value(nullptr) { + id = nextId; + nextId++; + }; + + virtual int getId() { return id; } virtual void setValue(shared_ptr childNode) { value = childNode; } virtual shared_ptr& getValue() { return value; } From cd65a0c76e9570c792667128a1afbc9f1322a25e Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 5 Aug 2024 19:41:33 -0400 Subject: [PATCH 04/38] properly fetch parameters from ancestor functions --- src/compiler/CodeGen.cpp | 39 +++++++++++++++++++++++++-------------- src/compiler/CodeGen.hpp | 2 +- 2 files changed, 26 insertions(+), 15 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 6485c39..35fc06a 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -1,4 +1,3 @@ -#include #include #include #include @@ -46,6 +45,8 @@ namespace Theta { return generateBlock(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::RETURN) { return generateReturn(dynamic_pointer_cast(node), module); + } else if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { + generateClosure(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::FUNCTION_INVOCATION) { return generateFunctionInvocation(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::CONTROL_FLOW) { @@ -142,8 +143,6 @@ namespace Theta { set requiredScopeIdentifiers; set paramIdentifiers; - cout << "GENERATING CLOSURE FOR FUNCTION, AST NODE ID: " << to_string(fnDeclNode->getId()) << endl; - for (auto param : fnDeclNode->getParameters()->getElements()) { paramIdentifiers.insert(dynamic_pointer_cast(param)->getIdentifier()); } @@ -163,21 +162,30 @@ namespace Theta { requiredScopeIdentifiers.insert(identifierName); } - // Find any identifiers that were passed in as parameters - deque> identifiersFromParams = findParameterizedIdentifiersFromAncestors(fnDeclNode, requiredScopeIdentifiers); + // Find any identifiers that were passed in as parameters, those need to be included in the new parameter set + vector> closureParameters = findParameterizedIdentifiersFromAncestors(fnDeclNode, requiredScopeIdentifiers); + + closureParameters.insert( + closureParameters.end(), + fnDeclNode->getParameters()->getElements().begin(), + fnDeclNode->getParameters()->getElements().end() + ); // If we've traversed the tree for parameters and we still have some missing identifiers, they must be defined in bodies if (requiredScopeIdentifiers.size() > 0) { + cout << "I STILL NEED MORE! DIDNT FIND: "; + for (auto i : requiredScopeIdentifiers) { + cout << i << ", "; + } + // TODO: scan ancestors for code relating to set variables. make sure to trace rhs to check if it gets assigned from + // a parameter } } - deque> CodeGen::findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, deque> found) { + vector> CodeGen::findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, vector> found) { if (identifiersToFind.size() == 0 || node->getParent()->getNodeType() == ASTNode::CAPSULE) return found; - cout << "TEEHEE" << endl; - cout << "PARENT IS: " << node->getParent()->toJSON() << endl; - if (node->getParent()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { return findParameterizedIdentifiersFromAncestors(node->parent, identifiersToFind, found); } @@ -193,13 +201,16 @@ namespace Theta { )); } - for (auto ident : identifiersToFind) { - auto param = paramIdentifiers.find(ident); + for (auto it = identifiersToFind.begin(); it != identifiersToFind.end();) { + auto param = paramIdentifiers.find(*it); - if (param == paramIdentifiers.end()) continue; + if (param == paramIdentifiers.end()) { + it++; + continue; + } - found.push_front(param->second); - identifiersToFind.erase(ident); + found.push_back(param->second); + it = identifiersToFind.erase(it); } return findParameterizedIdentifiersFromAncestors(parent, identifiersToFind, found); diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 02ed78b..3014902 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -61,6 +61,6 @@ namespace Theta { void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); - deque> findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, deque> found = {}); + vector> findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, vector> found = {}); }; } From e3eee952f17f8c6a6602c685880b9bab50f3615c Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 6 Aug 2024 00:05:01 -0400 Subject: [PATCH 05/38] start approach to collect closure scope for both params and body expressions at the same time --- src/compiler/CodeGen.cpp | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 35fc06a..72d37c2 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -179,10 +179,28 @@ namespace Theta { } // TODO: scan ancestors for code relating to set variables. make sure to trace rhs to check if it gets assigned from - // a parameter + // a parameter. I think we need to change the findParameterizedIdentifiersFromAncestors function to build + // out a list of params and function body statements at the same time, then return both } } + void CodeGen::collectClosureScope( + shared_ptr node, + set &identifiersToFind, + vector> ¶meters, + vector> &bodyExpressions + ) { + if (identifiersToFind.size() == 0 || node->getParent()->getNodeType() == ASTNode::CAPSULE) return; + + if (node->getParent()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { + return collectClosureScope(node->parent, identifiersToFind, parameters, bodyExpressions); + } + + shared_ptr parent = dynamic_pointer_cast(node->getParent()); + + // TODO: finish + } + vector> CodeGen::findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, vector> found) { if (identifiersToFind.size() == 0 || node->getParent()->getNodeType() == ASTNode::CAPSULE) return found; From cf719747d34ff6960a7e1e66e50f5e9278be8d5d Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 6 Aug 2024 01:57:37 -0400 Subject: [PATCH 06/38] much closer. correctly finding dependent body expressions and parameters for closures --- src/compiler/CodeGen.cpp | 71 ++++++++++++++++++++++++++-------------- src/compiler/CodeGen.hpp | 2 +- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 72d37c2..dab7798 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -162,15 +162,32 @@ namespace Theta { requiredScopeIdentifiers.insert(identifierName); } - // Find any identifiers that were passed in as parameters, those need to be included in the new parameter set - vector> closureParameters = findParameterizedIdentifiersFromAncestors(fnDeclNode, requiredScopeIdentifiers); + vector> closureParameters; + vector> closureExpressions; + collectClosureScope(fnDeclNode, requiredScopeIdentifiers, closureParameters, closureExpressions); + + // The collectClosureScope function will collect the parameters in reverse order, in order to preserve the order + // in which the params are passed throughout the ancestry path, so we need to reverse it back here to get the + // correct order + reverse(closureParameters.begin(), closureParameters.end()); + closureParameters.insert( closureParameters.end(), fnDeclNode->getParameters()->getElements().begin(), fnDeclNode->getParameters()->getElements().end() ); + cout << "Closure Params are, in order of left to right: "; + for (auto param : closureParameters) { + cout << param->toJSON() << ", "; + } + + cout << endl << "Closure expressions, in order of first to last: " << endl; + for (auto expr : closureExpressions) { + cout << expr->toJSON() << endl; + } + // If we've traversed the tree for parameters and we still have some missing identifiers, they must be defined in bodies if (requiredScopeIdentifiers.size() > 0) { cout << "I STILL NEED MORE! DIDNT FIND: "; @@ -198,40 +215,44 @@ namespace Theta { shared_ptr parent = dynamic_pointer_cast(node->getParent()); - // TODO: finish - } + vector> parentExpressions = dynamic_pointer_cast(parent->getDefinition())->getElements(); - vector> CodeGen::findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, vector> found) { - if (identifiersToFind.size() == 0 || node->getParent()->getNodeType() == ASTNode::CAPSULE) return found; + // Go through the parent expressions backwards so that we can collect dependencies and resolve them in one pass + for (int i = parentExpressions.size() - 1; i >= 0; i--) { + shared_ptr expr = parentExpressions.at(i); - if (node->getParent()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { - return findParameterizedIdentifiersFromAncestors(node->parent, identifiersToFind, found); - } + if (expr->getNodeType() != ASTNode::ASSIGNMENT) continue; - shared_ptr parent = dynamic_pointer_cast(node->getParent()); + string identifier = dynamic_pointer_cast(expr->getLeft())->getIdentifier(); + + auto identExpr = identifiersToFind.find(identifier); - unordered_map> paramIdentifiers; + if (identExpr == identifiersToFind.end()) continue; + + bodyExpressions.push_back(expr); + identifiersToFind.erase(identifier); - for (auto param : parent->getParameters()->getElements()) { - paramIdentifiers.insert(make_pair( - dynamic_pointer_cast(param)->getIdentifier(), - param - )); + // This expression we just found might depend on other identifiers, in which case we need to copy those over too + vector> dependentIdentifiers = Compiler::findAllInTree(expr->getRight(), ASTNode::IDENTIFIER); + for (auto ident : dependentIdentifiers) { + identifiersToFind.insert(dynamic_pointer_cast(ident)->getIdentifier()); + } } + + // Go through the parameters backwards so we can preserve their order if we ascend further into the ancestors. This + // will get reversed at the end + for (int i = parent->getParameters()->getElements().size() - 1; i >= 0; i--) { + shared_ptr ident = dynamic_pointer_cast(parent->getParameters()->getElements().at(i)); - for (auto it = identifiersToFind.begin(); it != identifiersToFind.end();) { - auto param = paramIdentifiers.find(*it); + auto identNeeded = identifiersToFind.find(ident->getIdentifier()); - if (param == paramIdentifiers.end()) { - it++; - continue; - } + if (identNeeded == identifiersToFind.end()) continue; - found.push_back(param->second); - it = identifiersToFind.erase(it); + parameters.push_back(ident); + identifiersToFind.erase(ident->getIdentifier()); } - return findParameterizedIdentifiersFromAncestors(parent, identifiersToFind, found); + collectClosureScope(parent, identifiersToFind, parameters, bodyExpressions); } BinaryenExpressionRef CodeGen::generateFunctionDeclaration( diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 3014902..b718a1f 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -61,6 +61,6 @@ namespace Theta { void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); - vector> findParameterizedIdentifiersFromAncestors(shared_ptr node, set &identifiersToFind, vector> found = {}); + void collectClosureScope(shared_ptr node, set &identifiersToFind, vector> ¶meters, vector> &bodyExpressions); }; } From 4f3f128bc4160e51bf7b7c57a7e8e2980de8d4e5 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 7 Aug 2024 21:15:11 -0400 Subject: [PATCH 07/38] properly transforms recursive functions into closures and sets their return value as a function pointer --- src/cli/cli.cpp | 8 +- src/compiler/CodeGen.cpp | 180 +++++++++++++++++++++++------------ src/compiler/CodeGen.hpp | 4 +- src/compiler/Compiler.cpp | 21 ++++ src/compiler/Compiler.hpp | 9 ++ src/compiler/TypeChecker.cpp | 25 +---- src/compiler/TypeChecker.hpp | 9 -- 7 files changed, 160 insertions(+), 96 deletions(-) diff --git a/src/cli/cli.cpp b/src/cli/cli.cpp index ced0d42..e09d8c5 100644 --- a/src/cli/cli.cpp +++ b/src/cli/cli.cpp @@ -62,6 +62,10 @@ namespace Theta { Theta::Compiler::getInstance().compile(sourceFile, outFile, isEmitTokens, isEmitAST, isEmitWAT); } + static string makeLink(string url, string text = "") { + return "\x1B]8;;" + url + "\x1B\\" + (text != "" ? text : url) + "\x1B]8;;\x1B\\"; + } + private: static void printUsageInstructions() { cout << "Theta Language Compiler CLI" << endl; @@ -121,9 +125,5 @@ namespace Theta { cout << endl << endl << "Exiting ITH..." << endl; } - - static string makeLink(string url, string text = "") { - return "\x1B]8;;" + url + "\x1B\\" + (text != "" ? text : url) + "\x1B]8;;\x1B\\"; - } }; } diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index dab7798..e584221 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -10,10 +10,13 @@ #include "StandardLibrary.hpp" #include "CodeGen.hpp" #include "DataTypes.hpp" +#include "parser/ast/ASTNodeList.hpp" #include "parser/ast/AssignmentNode.hpp" +#include "parser/ast/BlockNode.hpp" #include "parser/ast/FunctionDeclarationNode.hpp" #include "parser/ast/IdentifierNode.hpp" #include "parser/ast/TypeDeclarationNode.hpp" +#include "cli/CLI.cpp" namespace Theta { BinaryenModuleRef CodeGen::generateWasmFromAST(shared_ptr ast) { @@ -104,6 +107,13 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateAssignment(shared_ptr assignmentNode, BinaryenModuleRef &module) { string assignmentIdentifier = dynamic_pointer_cast(assignmentNode->getLeft())->getIdentifier(); + // Using a space in scope for an idx counter so we dont have to have a whole separate stack just to keep track of the current + // local idx + shared_ptr currentIdentIdx = dynamic_pointer_cast(scope.lookup(LOCAL_IDX_SCOPE_KEY)); + int idxOfAssignment = stoi(currentIdentIdx->getLiteralValue()); + + currentIdentIdx->setLiteralValue(to_string(idxOfAssignment + 1)); + scope.insert(LOCAL_IDX_SCOPE_KEY, currentIdentIdx); // Function declarations dont get generated generically like the rest of the AST elements, they are not part of the "generate" method, // because they behave differently depending on where the function was declared. A function declared at the top level of capsule will @@ -112,14 +122,6 @@ namespace Theta { // A function declared within another function body OR within any other structure will be turned into a closure that contains the scope // of anything outside of that function. if (assignmentNode->getRight()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { - // Using a space in scope for an idx counter so we dont have to have a whole separate stack just to keep track of the current - // local idx - shared_ptr currentIdentIdx = dynamic_pointer_cast(scope.lookup(LOCAL_IDX_SCOPE_KEY)); - int idxOfAssignment = stoi(currentIdentIdx->getLiteralValue()); - - currentIdentIdx->setLiteralValue(to_string(idxOfAssignment + 1)); - scope.insert(LOCAL_IDX_SCOPE_KEY, currentIdentIdx); - shared_ptr assignmentRhs = assignmentNode->getRight(); assignmentRhs->setMappedBinaryenIndex(idxOfAssignment); scope.insert(assignmentIdentifier, assignmentRhs); @@ -131,14 +133,41 @@ namespace Theta { ); } - generateClosure(dynamic_pointer_cast(assignmentNode->getRight()), module); + shared_ptr closure = generateClosure( + dynamic_pointer_cast(assignmentNode->getRight()), + module + ); + + string closureName = generateFunctionHash(closure); + + generateFunctionDeclaration( + closureName, + closure, + module + ); + + string qualifiedClosureName = Compiler::getQualifiedFunctionIdentifier( + closureName, + dynamic_pointer_cast(closure) + ); + + int functionIndex = functionNameToClosureMap.find(qualifiedClosureName)->second.getFunctionIndex(); + + closure->setMappedBinaryenIndex(idxOfAssignment); + scope.insert(assignmentIdentifier, closure); - // TODO: Functions will be defined as closures which take in the scope of the surrounding block as additional parameters - throw new runtime_error("Lambda functions are not yet implemented."); + return BinaryenLocalSet( + module, + idxOfAssignment, + BinaryenConst( + module, + BinaryenLiteralInt32(functionIndex) + ) + ); } // Transforms nested function declarations and generates an anonymous function in the function table - void CodeGen::generateClosure(shared_ptr fnDeclNode, BinaryenModuleRef &module) { + shared_ptr CodeGen::generateClosure(shared_ptr fnDeclNode, BinaryenModuleRef &module) { // Capture the outer scope set requiredScopeIdentifiers; set paramIdentifiers; @@ -161,6 +190,13 @@ namespace Theta { requiredScopeIdentifiers.insert(identifierName); } + + shared_ptr closure = make_shared(nullptr); + closure->setResolvedType(Compiler::deepCopyTypeDeclaration( + dynamic_pointer_cast(fnDeclNode->getResolvedType()), + closure + )); + vector> closureParameters; vector> closureExpressions; @@ -171,34 +207,58 @@ namespace Theta { // in which the params are passed throughout the ancestry path, so we need to reverse it back here to get the // correct order reverse(closureParameters.begin(), closureParameters.end()); - + reverse(closureExpressions.begin(), closureExpressions.end()); + closureParameters.insert( closureParameters.end(), fnDeclNode->getParameters()->getElements().begin(), fnDeclNode->getParameters()->getElements().end() ); - cout << "Closure Params are, in order of left to right: "; - for (auto param : closureParameters) { - cout << param->toJSON() << ", "; - } + vector> originalFnExpressions = dynamic_pointer_cast(fnDeclNode->getDefinition())->getElements(); - cout << endl << "Closure expressions, in order of first to last: " << endl; - for (auto expr : closureExpressions) { - cout << expr->toJSON() << endl; - } + closureExpressions.insert( + closureExpressions.end(), + originalFnExpressions.begin(), + originalFnExpressions.end() + ); + + shared_ptr parametersNode = make_shared(closure); + parametersNode->setElements(closureParameters); + closure->setParameters(parametersNode); + shared_ptr closureBody = make_shared(closure); + closureBody->setElements(closureExpressions); + closure->setDefinition(closureBody); + // If we've traversed the tree for parameters and we still have some missing identifiers, they must be defined in bodies if (requiredScopeIdentifiers.size() > 0) { - cout << "I STILL NEED MORE! DIDNT FIND: "; - for (auto i : requiredScopeIdentifiers) { - cout << i << ", "; + cout << "\033[1;31mFATAL ERROR: Could not locate necessary closure identifiers!\033[0m" << endl; + cout << " Missed identifiers: "; + for (int i = 0; i < requiredScopeIdentifiers.size(); i++) { + if (i > 0) cout << ", "; + cout << i; } + cout << endl << "This error is not caused by your code, but rather an issue with the compiler itself. Please report an issue at " << CLI::makeLink("https://github.com/alexdovzhanyn/ThetaLang/issues"); - // TODO: scan ancestors for code relating to set variables. make sure to trace rhs to check if it gets assigned from - // a parameter. I think we need to change the findParameterizedIdentifiersFromAncestors function to build - // out a list of params and function body statements at the same time, then return both + exit(1); } + + return closure; + } + + string CodeGen::generateFunctionHash(shared_ptr function) { + hash hasher; + + size_t hashed = hasher(function->toJSON()); + + ostringstream stream; + + stream << hex << nouppercase << setw(sizeof(size_t) * 2) << setfill('0'); + + stream << hashed; + + return stream.str(); } void CodeGen::collectClosureScope( @@ -209,50 +269,49 @@ namespace Theta { ) { if (identifiersToFind.size() == 0 || node->getParent()->getNodeType() == ASTNode::CAPSULE) return; - if (node->getParent()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { - return collectClosureScope(node->parent, identifiersToFind, parameters, bodyExpressions); - } - - shared_ptr parent = dynamic_pointer_cast(node->getParent()); + if (node->getParent()->getNodeType() == ASTNode::BLOCK) { + vector> parentExpressions = dynamic_pointer_cast(node->getParent())->getElements(); - vector> parentExpressions = dynamic_pointer_cast(parent->getDefinition())->getElements(); + // Go through the parent expressions backwards so that we can collect dependencies and resolve them in one pass + // in case this expression relies on one before it + for (int i = parentExpressions.size() - 1; i >= 0; i--) { + shared_ptr expr = parentExpressions.at(i); - // Go through the parent expressions backwards so that we can collect dependencies and resolve them in one pass - for (int i = parentExpressions.size() - 1; i >= 0; i--) { - shared_ptr expr = parentExpressions.at(i); + if (expr->getNodeType() != ASTNode::ASSIGNMENT) continue; - if (expr->getNodeType() != ASTNode::ASSIGNMENT) continue; + string identifier = dynamic_pointer_cast(expr->getLeft())->getIdentifier(); + + auto identExpr = identifiersToFind.find(identifier); - string identifier = dynamic_pointer_cast(expr->getLeft())->getIdentifier(); + if (identExpr == identifiersToFind.end()) continue; - auto identExpr = identifiersToFind.find(identifier); + bodyExpressions.push_back(expr); + identifiersToFind.erase(identifier); - if (identExpr == identifiersToFind.end()) continue; - - bodyExpressions.push_back(expr); - identifiersToFind.erase(identifier); - - // This expression we just found might depend on other identifiers, in which case we need to copy those over too - vector> dependentIdentifiers = Compiler::findAllInTree(expr->getRight(), ASTNode::IDENTIFIER); - for (auto ident : dependentIdentifiers) { - identifiersToFind.insert(dynamic_pointer_cast(ident)->getIdentifier()); + // This expression we just found might depend on other identifiers, in which case we need to copy those over too + vector> dependentIdentifiers = Compiler::findAllInTree(expr->getRight(), ASTNode::IDENTIFIER); + for (auto ident : dependentIdentifiers) { + identifiersToFind.insert(dynamic_pointer_cast(ident)->getIdentifier()); + } } - } - - // Go through the parameters backwards so we can preserve their order if we ascend further into the ancestors. This - // will get reversed at the end - for (int i = parent->getParameters()->getElements().size() - 1; i >= 0; i--) { - shared_ptr ident = dynamic_pointer_cast(parent->getParameters()->getElements().at(i)); + } else if (node->getParent()->getNodeType() == ASTNode::FUNCTION_DECLARATION) { + shared_ptr parent = dynamic_pointer_cast(node->getParent()); + + // Go through the parameters backwards so we can preserve their order if we ascend further into the ancestors. This + // will get reversed at the end + for (int i = parent->getParameters()->getElements().size() - 1; i >= 0; i--) { + shared_ptr ident = dynamic_pointer_cast(parent->getParameters()->getElements().at(i)); - auto identNeeded = identifiersToFind.find(ident->getIdentifier()); + auto identNeeded = identifiersToFind.find(ident->getIdentifier()); - if (identNeeded == identifiersToFind.end()) continue; + if (identNeeded == identifiersToFind.end()) continue; - parameters.push_back(ident); - identifiersToFind.erase(ident->getIdentifier()); + parameters.push_back(ident); + identifiersToFind.erase(ident->getIdentifier()); + } } - collectClosureScope(parent, identifiersToFind, parameters, bodyExpressions); + collectClosureScope(node->getParent(), identifiersToFind, parameters, bodyExpressions); } BinaryenExpressionRef CodeGen::generateFunctionDeclaration( @@ -571,6 +630,9 @@ namespace Theta { if (typeDeclaration->getType() == DataTypes::NUMBER) return BinaryenTypeInt64(); if (typeDeclaration->getType() == DataTypes::STRING) return BinaryenTypeStringref(); if (typeDeclaration->getType() == DataTypes::BOOLEAN) return BinaryenTypeInt32(); + + // Function references are returned as i32 pointers to a closure in the function table + if (typeDeclaration->getType() == DataTypes::FUNCTION) return BinaryenTypeInt32(); } void CodeGen::hoistCapsuleElements(vector> elements) { diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index b718a1f..264668c 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -33,7 +33,7 @@ namespace Theta { BinaryenExpressionRef generateBlock(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateReturn(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateFunctionDeclaration(string identifier, shared_ptr node, BinaryenModuleRef &module, bool addToExports = false); - void generateClosure(shared_ptr node, BinaryenModuleRef &module); + shared_ptr generateClosure(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateFunctionInvocation(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module); @@ -62,5 +62,7 @@ namespace Theta { void registerModuleFunctions(BinaryenModuleRef &module); void collectClosureScope(shared_ptr node, set &identifiersToFind, vector> ¶meters, vector> &bodyExpressions); + + string generateFunctionHash(shared_ptr function); }; } diff --git a/src/compiler/Compiler.cpp b/src/compiler/Compiler.cpp index ea974e1..b82b33f 100644 --- a/src/compiler/Compiler.cpp +++ b/src/compiler/Compiler.cpp @@ -284,4 +284,25 @@ namespace Theta { return {}; } + + shared_ptr Compiler::deepCopyTypeDeclaration(shared_ptr original, shared_ptr parent) { + shared_ptr copy = make_shared(original->getType(), parent); + + if (original->getValue()) { + copy->setValue(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getValue()), copy)); + } else if (original->getLeft()) { + copy->setLeft(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getLeft()), copy)); + copy->setRight(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getRight()), copy)); + } else if (original->getElements().size() > 0) { + vector> copyChildren; + + for (int i = 0; i < original->getElements().size(); i++) { + copyChildren.push_back(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getElements().at(i)), copy)); + } + + copy->setElements(copyChildren); + } + + return copy; + } } diff --git a/src/compiler/Compiler.hpp b/src/compiler/Compiler.hpp index 7769afd..3dd9750 100644 --- a/src/compiler/Compiler.hpp +++ b/src/compiler/Compiler.hpp @@ -120,6 +120,15 @@ namespace Theta { */ static vector> findAllInTree(shared_ptr node, ASTNode::Types type); + /** + * @brief Creates a deep copy of a type declaration node, useful for cases where type information + * needs to be duplicated without referencing the original. + * + * @param original The original type declaration node to copy. + * @return shared_ptr The deep-copied type declaration node. + */ + static shared_ptr deepCopyTypeDeclaration(shared_ptr node, shared_ptr parent); + shared_ptr> filesByCapsuleName; private: /** diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index 2881dd8..a77e987 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -638,7 +638,7 @@ namespace Theta { // Initially set the function resolvedType to whatever the identifier type is specified. This will get // updated later when we actually typecheck the function definition to whatever types the function actually returns. // This way, we support recursive function type resolution and cyclic function type resolution - node->getRight()->setResolvedType(deepCopyTypeDeclaration(dynamic_pointer_cast(ident->getValue()), node)); + node->getRight()->setResolvedType(Compiler::deepCopyTypeDeclaration(dynamic_pointer_cast(ident->getValue()), node)); capsuleDeclarationsTable.insert(uniqueFuncIdentifier, node->getRight()); } @@ -837,28 +837,7 @@ namespace Theta { } return functionIdentifier; - } - - shared_ptr TypeChecker::deepCopyTypeDeclaration(shared_ptr original, shared_ptr parent) { - shared_ptr copy = make_shared(original->getType(), parent); - - if (original->getValue()) { - copy->setValue(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getValue()), copy)); - } else if (original->getLeft()) { - copy->setLeft(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getLeft()), copy)); - copy->setRight(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getRight()), copy)); - } else if (original->getElements().size() > 0) { - vector> copyChildren; - - for (int i = 0; i < original->getElements().size(); i++) { - copyChildren.push_back(deepCopyTypeDeclaration(dynamic_pointer_cast(original->getElements().at(i)), copy)); - } - - copy->setElements(copyChildren); - } - - return copy; - } + } shared_ptr TypeChecker::lookupInScope(string identifierName) { shared_ptr foundInCapsule = capsuleDeclarationsTable.lookup(identifierName); diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 534f66d..62614d2 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -283,14 +283,5 @@ namespace Theta { * @return string The unique identifier for the function. */ static string getDeterministicFunctionIdentifier(string variableName, shared_ptr declarationNode); - - /** - * @brief Creates a deep copy of a type declaration node, useful for cases where type information - * needs to be duplicated without referencing the original. - * - * @param original The original type declaration node to copy. - * @return shared_ptr The deep-copied type declaration node. - */ - static shared_ptr deepCopyTypeDeclaration(shared_ptr node, shared_ptr parent); }; } From 9626126fd391bed669ebfb0c6e7f7e409fff892f Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 8 Aug 2024 21:14:56 -0400 Subject: [PATCH 08/38] rewrite type parsing to allow for function declarations to have type annotations for parameters. not fully working --- src/compiler/TypeChecker.cpp | 67 +++++++++++++++++++++++++++++++----- src/compiler/TypeChecker.hpp | 9 +++++ src/parser/Parser.cpp | 22 +++++------- 3 files changed, 75 insertions(+), 23 deletions(-) diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index a77e987..6c97296 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -161,8 +161,15 @@ namespace Theta { // Function names can be overloaded, so functions don't need this check if (rhsType == DataTypes::FUNCTION) { - string uniqueFuncIdentifier = getDeterministicFunctionIdentifier(ident->getIdentifier(), node->getRight()); - + string uniqueFuncIdentifier = getDeterministicFunctionIdentifier( + ident->getIdentifier(), + (node->getRight()->getNodeType() == ASTNode::FUNCTION_DECLARATION + ? node->getRight() + // We're assigning to the result of a function invocation, and that invocation returns a function + : node->getRight()->getResolvedType() + ) + ); + shared_ptr existingFuncIdentifierInScope = identifierTable.lookup(uniqueFuncIdentifier); if (existingFuncIdentifierInScope) { @@ -298,18 +305,30 @@ namespace Theta { paramScopeBindings.push_back(make_pair(ident->getIdentifier(), ident->getValue())); } } - + bool valid = checkAST(node->getDefinition(), paramScopeBindings); if (!valid) return false; + shared_ptr funcType = make_shared(DataTypes::FUNCTION, node); + + vector> typeValues; + for (auto param : node->getParameters()->getElements()) { + typeValues.push_back(Compiler::deepCopyTypeDeclaration(dynamic_pointer_cast(param->getValue()), funcType)); + } + + typeValues.push_back(node->getDefinition()->getResolvedType()); + + funcType->setElements(typeValues); + // A function might already have a resolvedType if it was hoisted, we need to redefine it with the real return type if (node->getResolvedType()) { - node->getResolvedType()->setValue(node->getDefinition()->getResolvedType()); + if (typeValues.size() == 1) { + node->getResolvedType()->setValue(typeValues.at(0)); + } else { + dynamic_pointer_cast(node->getResolvedType())->setElements(typeValues); + } } else { - shared_ptr funcType = make_shared(DataTypes::FUNCTION, node); - funcType->setValue(node->getDefinition()->getResolvedType()); - node->setResolvedType(funcType); } @@ -329,12 +348,21 @@ namespace Theta { shared_ptr referencedFunction = lookupInScope(uniqueFuncIdentifier); if (!referencedFunction) { + cout << "shleem: " << uniqueFuncIdentifier << endl; + Compiler::getInstance().addException(make_shared(funcIdentifier)); return false; } - node->setResolvedType(referencedFunction->getResolvedType()->getValue()); - + shared_ptr referencedFunctionType = dynamic_pointer_cast(referencedFunction->getResolvedType()); + + // The function return type is the last element in the types list + if (referencedFunctionType->getValue()) { + node->setResolvedType(referencedFunctionType->getValue()); + } else { + node->setResolvedType(referencedFunctionType->getElements().back()); + } + return true; } @@ -819,6 +847,8 @@ namespace Theta { if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { shared_ptr declarationNode = dynamic_pointer_cast(node); params = declarationNode->getParameters()->getElements(); + } else if (node->getNodeType() == ASTNode::TYPE_DECLARATION) { + return getDeterministicFunctionIdentifierFromTypeSignature(variableName, dynamic_pointer_cast(node)); } else { shared_ptr invocationNode = dynamic_pointer_cast(node); params = invocationNode->getParameters()->getElements(); @@ -839,6 +869,25 @@ namespace Theta { return functionIdentifier; } + string TypeChecker::getDeterministicFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig) { + vector> params; + + // If typeSig has a value, that means the function takes in no parameters and only has a return value + if (typeSig->getValue() == nullptr) { + params.resize(typeSig->getElements().size() - 1); + copy(typeSig->getElements().begin(), typeSig->getElements().end() - 1, params.begin()); + } + + string functionIdentifier = variableName + to_string(params.size()); + + for (auto param : params) { + shared_ptr p = dynamic_pointer_cast(param); + functionIdentifier += p->getType(); + } + + return functionIdentifier; + } + shared_ptr TypeChecker::lookupInScope(string identifierName) { shared_ptr foundInCapsule = capsuleDeclarationsTable.lookup(identifierName); shared_ptr foundInLocalScope = identifierTable.lookup(identifierName); diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 62614d2..763ca25 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -283,5 +283,14 @@ namespace Theta { * @return string The unique identifier for the function. */ static string getDeterministicFunctionIdentifier(string variableName, shared_ptr declarationNode); + + /** + * @brief Generates a unique function identifier based on the function's name and its type signature + * + * @param variableName The base name of the function. + * @param typeSig The type signature of the function. + * @return string The unique identifier for the function. + */ + static string getDeterministicFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig); }; } diff --git a/src/parser/Parser.cpp b/src/parser/Parser.cpp index 53dbcc2..296de6d 100644 --- a/src/parser/Parser.cpp +++ b/src/parser/Parser.cpp @@ -718,23 +718,17 @@ namespace Theta { shared_ptr typ = make_shared(typeName, parent); if (match(Token::OPERATOR, Lexemes::LT)) { - shared_ptr l = parseType(typ); + shared_ptr typeDecl = dynamic_pointer_cast(typ); + vector> types = { parseType(typeDecl) }; - if (typeName == DataTypes::VARIADIC) { - shared_ptr variadic = dynamic_pointer_cast(typ); - vector> types; - types.push_back(l); - - while (match(Token::COMMA)) { - types.push_back(parseType(typ)); - } + while (match(Token::COMMA)) { + types.push_back(parseType(typeDecl)); + } - variadic->setElements(types); - } else if (match(Token::COMMA)) { - typ->setLeft(l); - typ->setRight(parseType(typ)); + if (types.size() > 1) { + typeDecl->setElements(types); } else { - typ->setValue(l); + typeDecl->setValue(types.at(0)); } match(Token::OPERATOR, Lexemes::GT); From 0aaa13ecb3935fab98492698a29a8fa361b66130 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 8 Aug 2024 21:35:58 -0400 Subject: [PATCH 09/38] almost there! --- src/compiler/CodeGen.cpp | 8 +++++++- src/compiler/TypeChecker.cpp | 6 ++++++ src/compiler/TypeChecker.hpp | 3 +++ 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index e584221..c1d1213 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -6,6 +6,7 @@ #include #include "binaryen-c.h" #include "compiler/Compiler.hpp" +#include "compiler/TypeChecker.hpp" #include "lexer/Lexemes.hpp" #include "StandardLibrary.hpp" #include "CodeGen.hpp" @@ -107,6 +108,8 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateAssignment(shared_ptr assignmentNode, BinaryenModuleRef &module) { string assignmentIdentifier = dynamic_pointer_cast(assignmentNode->getLeft())->getIdentifier(); + + cout << "generating assignment for " << assignmentIdentifier << endl; // Using a space in scope for an idx counter so we dont have to have a whole separate stack just to keep track of the current // local idx shared_ptr currentIdentIdx = dynamic_pointer_cast(scope.lookup(LOCAL_IDX_SCOPE_KEY)); @@ -362,7 +365,7 @@ namespace Theta { module, functionName.c_str(), parameterType, - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(fnDeclNode->getResolvedType()->getValue())), + getBinaryenTypeFromTypeDeclaration(TypeChecker::getFunctionReturnType(fnDeclNode)), localVariableTypes, localVariables.size(), generate(fnDeclNode->getDefinition(), module) @@ -401,6 +404,7 @@ namespace Theta { } BinaryenExpressionRef CodeGen::generateFunctionInvocation(shared_ptr funcInvNode, BinaryenModuleRef &module) { + cout << "generating function invocation!" << dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier() << endl; BinaryenExpressionRef* arguments = new BinaryenExpressionRef[funcInvNode->getParameters()->getElements().size()]; string funcName = Compiler::getQualifiedFunctionIdentifier( @@ -412,6 +416,8 @@ namespace Theta { arguments[i] = generate(funcInvNode->getParameters()->getElements().at(i), module); } + cout << "right before binaryen call" << endl; + return BinaryenCall( module, funcName.c_str(), diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index 6c97296..26fff16 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -897,4 +897,10 @@ namespace Theta { return foundInCapsule; } + + shared_ptr TypeChecker::getFunctionReturnType(shared_ptr fnDeclNode) { + if (fnDeclNode->getResolvedType()->getValue()) return dynamic_pointer_cast(fnDeclNode->getResolvedType()->getValue()); + + return dynamic_pointer_cast(dynamic_pointer_cast(fnDeclNode->getResolvedType())->getElements().back()); + } } diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 763ca25..4fa3f28 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -61,6 +61,9 @@ namespace Theta { */ static bool isOneOfTypes(shared_ptr type, vector> options); + static shared_ptr getFunctionReturnType(shared_ptr fnDeclNode); + + private: SymbolTableStack identifierTable; SymbolTableStack capsuleDeclarationsTable; From 6b43ca93bc5de5d36e900ef8ef8703a72ba83208 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 8 Aug 2024 21:53:10 -0400 Subject: [PATCH 10/38] minor cleanup --- src/compiler/Compiler.cpp | 21 ++++++++++++++ src/compiler/Compiler.hpp | 10 +++++++ src/compiler/TypeChecker.cpp | 53 ++---------------------------------- src/compiler/TypeChecker.hpp | 20 +------------- 4 files changed, 35 insertions(+), 69 deletions(-) diff --git a/src/compiler/Compiler.cpp b/src/compiler/Compiler.cpp index b82b33f..5428aea 100644 --- a/src/compiler/Compiler.cpp +++ b/src/compiler/Compiler.cpp @@ -223,6 +223,8 @@ namespace Theta { if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { shared_ptr declarationNode = dynamic_pointer_cast(node); params = declarationNode->getParameters()->getElements(); + } else if (node->getNodeType() == ASTNode::TYPE_DECLARATION) { + return getQualifiedFunctionIdentifierFromTypeSignature(variableName, dynamic_pointer_cast(node)); } else { shared_ptr invocationNode = dynamic_pointer_cast(node); params = invocationNode->getParameters()->getElements(); @@ -243,6 +245,25 @@ namespace Theta { return functionIdentifier; } + string Compiler::getQualifiedFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig) { + vector> params; + + // If typeSig has a value, that means the function takes in no parameters and only has a return value + if (typeSig->getValue() == nullptr) { + params.resize(typeSig->getElements().size() - 1); + copy(typeSig->getElements().begin(), typeSig->getElements().end() - 1, params.begin()); + } + + string functionIdentifier = variableName + to_string(params.size()); + + for (auto param : params) { + shared_ptr p = dynamic_pointer_cast(param); + functionIdentifier += p->getType(); + } + + return functionIdentifier; + } + vector> Compiler::findAllInTree(shared_ptr node, ASTNode::Types nodeType) { if (node->getNodeType() == nodeType) return { node }; diff --git a/src/compiler/Compiler.hpp b/src/compiler/Compiler.hpp index 3dd9750..4188475 100644 --- a/src/compiler/Compiler.hpp +++ b/src/compiler/Compiler.hpp @@ -17,6 +17,7 @@ #include "CodeGen.hpp" #include "compiler/optimization/OptimizationPass.hpp" #include "compiler/optimization/LiteralInlinerPass.hpp" +#include "parser/ast/TypeDeclarationNode.hpp" using namespace std; @@ -110,6 +111,15 @@ namespace Theta { * @return string The unique identifier for the function. */ static string getQualifiedFunctionIdentifier(string variableName, shared_ptr node); + + /** + * @brief Generates a unique function identifier based on the function's name and its type signature + * + * @param variableName The base name of the function. + * @param typeSig The type signature of the function. + * @return string The unique identifier for the function. + */ + static string getQualifiedFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig); /** * @brief Finds all AST nodes of a specific type within the tree rooted at a given node. diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index 26fff16..931228e 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -161,7 +161,7 @@ namespace Theta { // Function names can be overloaded, so functions don't need this check if (rhsType == DataTypes::FUNCTION) { - string uniqueFuncIdentifier = getDeterministicFunctionIdentifier( + string uniqueFuncIdentifier = Compiler::getQualifiedFunctionIdentifier( ident->getIdentifier(), (node->getRight()->getNodeType() == ASTNode::FUNCTION_DECLARATION ? node->getRight() @@ -343,7 +343,7 @@ namespace Theta { if (!validParams) return false; string funcIdentifier = dynamic_pointer_cast(node->getIdentifier())->getIdentifier(); - string uniqueFuncIdentifier = getDeterministicFunctionIdentifier(funcIdentifier, node); + string uniqueFuncIdentifier = Compiler::getQualifiedFunctionIdentifier(funcIdentifier, node); shared_ptr referencedFunction = lookupInScope(uniqueFuncIdentifier); @@ -654,7 +654,7 @@ namespace Theta { shared_ptr assignmentNode = dynamic_pointer_cast(node); shared_ptr ident = dynamic_pointer_cast(node->getLeft()); - string uniqueFuncIdentifier = getDeterministicFunctionIdentifier(ident->getIdentifier(), node->getRight()); + string uniqueFuncIdentifier = Compiler::getQualifiedFunctionIdentifier(ident->getIdentifier(), node->getRight()); shared_ptr existingFuncIdentifierInScope = capsuleDeclarationsTable.lookup(uniqueFuncIdentifier); @@ -841,53 +841,6 @@ namespace Theta { return variadicTypeNode; } - string TypeChecker::getDeterministicFunctionIdentifier(string variableName, shared_ptr node) { - vector> params; - - if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { - shared_ptr declarationNode = dynamic_pointer_cast(node); - params = declarationNode->getParameters()->getElements(); - } else if (node->getNodeType() == ASTNode::TYPE_DECLARATION) { - return getDeterministicFunctionIdentifierFromTypeSignature(variableName, dynamic_pointer_cast(node)); - } else { - shared_ptr invocationNode = dynamic_pointer_cast(node); - params = invocationNode->getParameters()->getElements(); - } - - string functionIdentifier = variableName + to_string(params.size()); - - for (int i = 0; i < params.size(); i++) { - if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { - shared_ptr paramType = dynamic_pointer_cast(params.at(i)->getValue()); - functionIdentifier += paramType->getType(); - } else { - shared_ptr paramType = dynamic_pointer_cast(params.at(i)->getResolvedType()); - functionIdentifier += paramType->getType(); - } - } - - return functionIdentifier; - } - - string TypeChecker::getDeterministicFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig) { - vector> params; - - // If typeSig has a value, that means the function takes in no parameters and only has a return value - if (typeSig->getValue() == nullptr) { - params.resize(typeSig->getElements().size() - 1); - copy(typeSig->getElements().begin(), typeSig->getElements().end() - 1, params.begin()); - } - - string functionIdentifier = variableName + to_string(params.size()); - - for (auto param : params) { - shared_ptr p = dynamic_pointer_cast(param); - functionIdentifier += p->getType(); - } - - return functionIdentifier; - } - shared_ptr TypeChecker::lookupInScope(string identifierName) { shared_ptr foundInCapsule = capsuleDeclarationsTable.lookup(identifierName); shared_ptr foundInLocalScope = identifierTable.lookup(identifierName); diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 4fa3f28..9b3972a 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -276,24 +276,6 @@ namespace Theta { * @param types The vector of type declaration nodes. * @return shared_ptr The created variadic type node. */ - static shared_ptr makeVariadicType(vector> types, shared_ptr parent); - - /** - * @brief Generates a unique function identifier based on the function's name and its parameters to handle overloading. - * - * @param variableName The base name of the function. - * @param declarationNode The function declaration node containing the parameters. - * @return string The unique identifier for the function. - */ - static string getDeterministicFunctionIdentifier(string variableName, shared_ptr declarationNode); - - /** - * @brief Generates a unique function identifier based on the function's name and its type signature - * - * @param variableName The base name of the function. - * @param typeSig The type signature of the function. - * @return string The unique identifier for the function. - */ - static string getDeterministicFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig); + static shared_ptr makeVariadicType(vector> types, shared_ptr parent); }; } From ae563dd27fb8592f4cc70b2ca328bed34a77da7b Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 9 Aug 2024 20:01:34 -0400 Subject: [PATCH 11/38] correctly creating and storing closure arguments and function references --- src/compiler/CodeGen.cpp | 192 +++++++++++++++++++++++++++++++---- src/compiler/CodeGen.hpp | 11 +- src/compiler/WasmClosure.hpp | 31 ++++-- 3 files changed, 204 insertions(+), 30 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index c1d1213..6742400 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -1,12 +1,15 @@ #include +#include #include #include #include #include #include +#include #include "binaryen-c.h" #include "compiler/Compiler.hpp" #include "compiler/TypeChecker.hpp" +#include "compiler/WasmClosure.hpp" #include "lexer/Lexemes.hpp" #include "StandardLibrary.hpp" #include "CodeGen.hpp" @@ -21,21 +24,43 @@ namespace Theta { BinaryenModuleRef CodeGen::generateWasmFromAST(shared_ptr ast) { - BinaryenModuleRef module = BinaryenModuleCreate(); - - BinaryenModuleSetFeatures(module, BinaryenFeatureStrings()); - - StandardLibrary::registerFunctions(module); + BinaryenModuleRef module = initializeWasmModule(); generate(ast, module); registerModuleFunctions(module); - + + // Automatically adds drops to unused stack values BinaryenModuleAutoDrop(module); return module; } + BinaryenModuleRef CodeGen::initializeWasmModule() { + BinaryenModuleRef module = BinaryenModuleCreate(); + + BinaryenModuleSetFeatures(module, BinaryenFeatureStrings()); + BinaryenSetMemory( + module, + 1, // IMPORTANT: Memory size is dictated in pages, NOT bytes, where each page is 64k + 10, + "memory", + NULL, + NULL, + NULL, + NULL, + NULL, + 0, + false, + false, + MEMORY_NAME.c_str() + ); + + StandardLibrary::registerFunctions(module); + + return module; + } + BinaryenExpressionRef CodeGen::generate(shared_ptr node, BinaryenModuleRef &module) { if (node->hasOwnScope()) scope.enterScope(); @@ -109,7 +134,6 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateAssignment(shared_ptr assignmentNode, BinaryenModuleRef &module) { string assignmentIdentifier = dynamic_pointer_cast(assignmentNode->getLeft())->getIdentifier(); - cout << "generating assignment for " << assignmentIdentifier << endl; // Using a space in scope for an idx counter so we dont have to have a whole separate stack just to keep track of the current // local idx shared_ptr currentIdentIdx = dynamic_pointer_cast(scope.lookup(LOCAL_IDX_SCOPE_KEY)); @@ -127,7 +151,13 @@ namespace Theta { if (assignmentNode->getRight()->getNodeType() != ASTNode::FUNCTION_DECLARATION) { shared_ptr assignmentRhs = assignmentNode->getRight(); assignmentRhs->setMappedBinaryenIndex(idxOfAssignment); - scope.insert(assignmentIdentifier, assignmentRhs); + + string identName = assignmentIdentifier; + if (assignmentNode->getRight()->getNodeType() == ASTNode::FUNCTION_INVOCATION) { + identName = Compiler::getQualifiedFunctionIdentifier(identName, assignmentNode->getRight()->getResolvedType()); + } + + scope.insert(identName, assignmentRhs); return BinaryenLocalSet( module, @@ -154,7 +184,7 @@ namespace Theta { dynamic_pointer_cast(closure) ); - int functionIndex = functionNameToClosureMap.find(qualifiedClosureName)->second.getFunctionIndex(); + int functionIndex = functionNameToClosureTemplateMap.find(qualifiedClosureName)->second.getFunctionIndex(); closure->setMappedBinaryenIndex(idxOfAssignment); scope.insert(assignmentIdentifier, closure); @@ -371,10 +401,13 @@ namespace Theta { generate(fnDeclNode->getDefinition(), module) ); - functionNameToClosureMap.insert(make_pair( - functionName, - WasmClosure(functionNameToClosureMap.size(), totalParams) - )); + // Only add to the closure template map if its not already in there. It may have been added during hoisting + if (functionNameToClosureTemplateMap.find(functionName) == functionNameToClosureTemplateMap.end()) { + functionNameToClosureTemplateMap.insert(make_pair( + functionName, + WasmClosure(functionNameToClosureTemplateMap.size(), totalParams) + )); + } if (addToExports) { BinaryenAddFunctionExport(module, functionName.c_str(), functionName.c_str()); @@ -404,7 +437,6 @@ namespace Theta { } BinaryenExpressionRef CodeGen::generateFunctionInvocation(shared_ptr funcInvNode, BinaryenModuleRef &module) { - cout << "generating function invocation!" << dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier() << endl; BinaryenExpressionRef* arguments = new BinaryenExpressionRef[funcInvNode->getParameters()->getElements().size()]; string funcName = Compiler::getQualifiedFunctionIdentifier( @@ -416,7 +448,14 @@ namespace Theta { arguments[i] = generate(funcInvNode->getParameters()->getElements().at(i), module); } - cout << "right before binaryen call" << endl; + shared_ptr foundLocalReference = scope.lookup(funcName); + + if (foundLocalReference) { + return generateIndirectInvocation(funcInvNode, foundLocalReference, module); + } + + // TODO: Check if this needs to be an indirect call, and generate that instead of a normal call. Thats why the current compile + // is failing return BinaryenCall( module, @@ -427,6 +466,70 @@ namespace Theta { ); } + BinaryenExpressionRef CodeGen::generateIndirectInvocation(shared_ptr funcInvNode, shared_ptr reference, BinaryenModuleRef &module) { + if (reference->getNodeType() == ASTNode::FUNCTION_DECLARATION) { + shared_ptr ref = dynamic_pointer_cast(reference); + string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); + + string refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); + cout << "Looking for closure template: " << refIdentifier << endl; + + WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; + + WasmClosure closure = WasmClosure::clone(closureTemplate); + + vector expressions; + + vector paramMemPointers; + + for (auto arg : funcInvNode->getParameters()->getElements()) { + int byteSize = calculateLiteralByteSize(arg); + int memLocation = memoryOffset; + + paramMemPointers.push_back(memLocation); + + memoryOffset += byteSize; + + expressions.push_back( + BinaryenStore( + module, + byteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memLocation)), + generate(arg, module), + getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), + MEMORY_NAME.c_str() + ) + ); + } + + closure.addArgs(paramMemPointers); + + pair> storage = generateClosureMemoryStore(closure, module); + + vector temp = storage.second; + + copy(temp.begin(), temp.end(), back_inserter(expressions)); + + // TODO: replace with call_indirect + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt64(1))); + + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; + for (int i = 0; i < expressions.size(); i++) { + blockExpressions[i] = expressions.at(i); + } + + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeInt64()); + } + + shared_ptr ref = dynamic_pointer_cast(reference); + string refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); + + functionNameToClosureTemplateMap.find(Compiler::getQualifiedFunctionIdentifier(refIdentifier, reference)); + + } + BinaryenExpressionRef CodeGen::generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module) { controlFlowNode->getConditionExpressionPairs(); @@ -613,6 +716,37 @@ namespace Theta { } } + pair> CodeGen::generateClosureMemoryStore(WasmClosure closure, BinaryenModuleRef &module) { + // At least 4 bytes for the fn_idx and 4 bytes for the arity. Then 4 bytes for each parameter the closure takes. + // We also multiply the remaining arity, since not all parameters may have been applied to the function + int totalMemSize = 8 + (closure.getArgPointers().size() * 4) + (closure.getArity() * 4); + int memLocation = memoryOffset; + + vector closureDataSegments = { closure.getFunctionIndex(), closure.getArity() }; + for (int i = 0; i < closure.getArgPointers().size(); i++) { + closureDataSegments.push_back(closure.getArgPointers().at(i)); + } + + vector expressions; + + for (int i = 0; i < closureDataSegments.size(); i++) { + expressions.push_back( + BinaryenStore( + module, + 4, + i * 4, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memLocation)), + BinaryenConst(module, BinaryenLiteralInt32(closureDataSegments.at(i))), + BinaryenTypeInt32(), + MEMORY_NAME.c_str() + ) + ); + } + + return make_pair(memLocation, expressions); + } + BinaryenOp CodeGen::getBinaryenOpFromBinOpNode(shared_ptr binOpNode) { string op = binOpNode->getOperator(); @@ -652,6 +786,13 @@ namespace Theta { if (ast->getRight()->getNodeType() == ASTNode::FUNCTION_DECLARATION) { identifier = Compiler::getQualifiedFunctionIdentifier(identifier, ast->getRight()); + + int totalParams = dynamic_pointer_cast(ast->getRight())->getParameters()->getElements().size(); + + functionNameToClosureTemplateMap.insert(make_pair( + identifier, + WasmClosure(functionNameToClosureTemplateMap.size(), totalParams) + )); } scope.insert(identifier, ast->getRight()); @@ -661,14 +802,14 @@ namespace Theta { BinaryenAddTable( module, FN_TABLE_NAME.c_str(), - functionNameToClosureMap.size(), - functionNameToClosureMap.size(), + functionNameToClosureTemplateMap.size(), + functionNameToClosureTemplateMap.size(), BinaryenTypeFuncref() ); - const char** fnNames = new const char*[functionNameToClosureMap.size()]; + const char** fnNames = new const char*[functionNameToClosureTemplateMap.size()]; - for (auto& [fnName, fnRef] : functionNameToClosureMap) { + for (auto& [fnName, fnRef] : functionNameToClosureTemplateMap) { fnNames[fnRef.getFunctionIndex()] = fnName.c_str(); } @@ -677,8 +818,19 @@ namespace Theta { FN_TABLE_NAME.c_str(), "0", fnNames, - functionNameToClosureMap.size(), + functionNameToClosureTemplateMap.size(), BinaryenConst(module, BinaryenLiteralInt32(0)) ); } + + int CodeGen::calculateLiteralByteSize(shared_ptr literal) { + if (literal->getNodeType() == ASTNode::BOOLEAN_LITERAL) return 4; + if (literal->getNodeType() == ASTNode::NUMBER_LITERAL) return 8; + if (literal->getNodeType() == ASTNode::STRING_LITERAL) { + cout << "WARNING! String byte size count has not been implemented." << endl; + return 100; + } + + throw new runtime_error("No cant calculate byte size for non-literal"); + } } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 264668c..b096c47 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -35,6 +35,7 @@ namespace Theta { BinaryenExpressionRef generateFunctionDeclaration(string identifier, shared_ptr node, BinaryenModuleRef &module, bool addToExports = false); shared_ptr generateClosure(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateFunctionInvocation(shared_ptr node, BinaryenModuleRef &module); + BinaryenExpressionRef generateIndirectInvocation(shared_ptr node, shared_ptr reference, BinaryenModuleRef &module); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateBinaryOperation(shared_ptr node, BinaryenModuleRef &module); @@ -48,10 +49,14 @@ namespace Theta { private: SymbolTableStack scope; string FN_TABLE_NAME = "0"; - unordered_map functionNameToClosureMap; + string MEMORY_NAME = "0"; + int memoryOffset = 0; + unordered_map functionNameToClosureTemplateMap; string LOCAL_IDX_SCOPE_KEY = "ThetaLang.internal.localIdxCounter"; string BOOTSTRAP_FUNC_NAME = "ThetaLang.bootstrap"; + BinaryenModuleRef initializeWasmModule(); + BinaryenExpressionRef generateStringBinaryOperation(string op, BinaryenExpressionRef left, BinaryenExpressionRef right, BinaryenModuleRef &module); static BinaryenOp getBinaryenOpFromBinOpNode(shared_ptr node); @@ -61,8 +66,12 @@ namespace Theta { void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); + pair> generateClosureMemoryStore(WasmClosure closure, BinaryenModuleRef &module); + void collectClosureScope(shared_ptr node, set &identifiersToFind, vector> ¶meters, vector> &bodyExpressions); string generateFunctionHash(shared_ptr function); + + int calculateLiteralByteSize(shared_ptr literal); }; } diff --git a/src/compiler/WasmClosure.hpp b/src/compiler/WasmClosure.hpp index 090154b..4d90344 100644 --- a/src/compiler/WasmClosure.hpp +++ b/src/compiler/WasmClosure.hpp @@ -1,30 +1,43 @@ #pragma once +#include + +using namespace std; + namespace Theta { class WasmClosure { public: - WasmClosure(int tableIndex, int arity) { + WasmClosure(int tableIndex, int initialArity) { idx = tableIndex; - arity = arity; - argPointers = new int[arity]; + arity = initialArity; + + argPointers.resize(arity); } int getFunctionIndex() { return idx; } int getArity() { return arity; } - int* getArgPointers() { return argPointers; } + vector getArgPointers() { return argPointers; } + + void addArgs(vector argPtrs) { + for (int argPtr : argPtrs) { + argPointers[arity - 1] = argPtr; + arity--; + } + } - void addArg(int argPtr) { - argPointers[arity] = argPtr; - currentArgs++; + static WasmClosure clone(WasmClosure toClone) { + return WasmClosure( + toClone.getFunctionIndex(), + toClone.arity + ); } private: int idx; int arity; - int* argPointers; - int currentArgs = 0; + vector argPointers; }; } From a340019ed957dd29c1c2cf04cef9209487d87bce Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 10 Aug 2024 17:01:55 -0400 Subject: [PATCH 12/38] read from base module to establish core lang features --- CMakeLists.txt | 10 ++++++ src/compiler/CodeGen.cpp | 66 ++++++++++++++++++++++++++++++++++++-- src/compiler/CodeGen.hpp | 4 +++ src/wasm/ThetaLangCore.wat | 5 +++ 4 files changed, 82 insertions(+), 3 deletions(-) create mode 100644 src/wasm/ThetaLangCore.wat diff --git a/CMakeLists.txt b/CMakeLists.txt index d6df462..6eb274f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,16 @@ file(GLOB TEST_SRC_FILES "${TEST_DIR}/*.cpp") # Add executable for the main program add_executable(theta ${SRC_FILES} ${MAIN_SRC}) +# Add a custom command to copy the WAT file +set(WAT_FILE_SRC "${CMAKE_SOURCE_DIR}/src/wasm/ThetaLangCore.wat") +set(WAT_FILE_DEST "${CMAKE_BINARY_DIR}/wasm") +add_custom_command( + TARGET theta POST_BUILD + COMMAND ${CMAKE_COMMAND} -E make_directory ${WAT_FILE_DEST} + COMMAND ${CMAKE_COMMAND} -E copy_if_different ${WAT_FILE_SRC} ${WAT_FILE_DEST} + COMMENT "Copying WAT file to build directory" +) + # Add the readline library if (WIN32) # Add the readline library diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 6742400..b57e7c1 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -1,4 +1,7 @@ #include +#include +#include +#include #include #include #include @@ -22,6 +25,10 @@ #include "parser/ast/TypeDeclarationNode.hpp" #include "cli/CLI.cpp" +#ifdef __APPLE__ +#include +#endif + namespace Theta { BinaryenModuleRef CodeGen::generateWasmFromAST(shared_ptr ast) { BinaryenModuleRef module = initializeWasmModule(); @@ -37,7 +44,7 @@ namespace Theta { } BinaryenModuleRef CodeGen::initializeWasmModule() { - BinaryenModuleRef module = BinaryenModuleCreate(); + BinaryenModuleRef module = importCoreLangWasm(); BinaryenModuleSetFeatures(module, BinaryenFeatureStrings()); BinaryenSetMemory( @@ -510,10 +517,10 @@ namespace Theta { vector temp = storage.second; - copy(temp.begin(), temp.end(), back_inserter(expressions)); + copy(storage.second.begin(), storage.second.end(), back_inserter(expressions)); // TODO: replace with call_indirect - expressions.push_back(BinaryenConst(module, BinaryenLiteralInt64(1))); + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt64(storage.first))); BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; for (int i = 0; i < expressions.size(); i++) { @@ -833,4 +840,57 @@ namespace Theta { throw new runtime_error("No cant calculate byte size for non-literal"); } + + BinaryenModuleRef CodeGen::importCoreLangWasm() { + ifstream file(resolveAbsolutePath("wasm/ThetaLangCore.wat"), ios::binary); + if (!file.is_open()) { + cerr << "Failed to open the file." << endl; + return nullptr; + } + + vector buffer(istreambuf_iterator(file), {}); + + if (buffer.empty()) { + cerr << "Failed to read the file or the file is empty." << endl; + return nullptr; + } + + file.close(); + + return BinaryenModuleParse(buffer.data()); + } + + string CodeGen::resolveAbsolutePath(string relativePath) { + char path[PATH_MAX]; + + #ifdef __APPLE__ + uint32_t size = sizeof(path); + if (_NSGetExecutablePath(path, &size) != 0) { + cerr << "Buffer too small; should be resized to " << size << " bytes\n" << endl; + return ""; + } + #else + ssize_t count = readlink("/proc/self/exe", path, PATH_MAX); + if (count <= 0) { + cerr << "Failed to read the path of the executable." << endl; + return ""; + } + path[count] = '\0'; // Ensure null termination + #endif + + char realPath[PATH_MAX]; + if (realpath(path, realPath) == NULL) { + cerr << "Error resolving symlink for " << path << endl; + return ""; + } + + string exePath = string(realPath); + if (exePath.empty()) return ""; + + char *pathCStr = strdup(exePath.c_str()); + string dirPath = dirname(pathCStr); // Use dirname to get the directory part + free(pathCStr); // Free the duplicated string + + return dirPath + "/" + relativePath; + } } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index b096c47..a228a98 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -73,5 +73,9 @@ namespace Theta { string generateFunctionHash(shared_ptr function); int calculateLiteralByteSize(shared_ptr literal); + + BinaryenModuleRef importCoreLangWasm(); + + string resolveAbsolutePath(string relativePath); }; } diff --git a/src/wasm/ThetaLangCore.wat b/src/wasm/ThetaLangCore.wat new file mode 100644 index 0000000..a1be4ae --- /dev/null +++ b/src/wasm/ThetaLangCore.wat @@ -0,0 +1,5 @@ +(module + (func $Theta.Function.executeIndirect (result i32) + i32.const 2 + ) +) From 419dad60be52641814aedf72a5912efe63d81e4b Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 10 Aug 2024 21:56:59 -0400 Subject: [PATCH 13/38] fix module loading from wat, get indirect function calls working in a very basic state --- CMakeLists.txt | 6 ++-- src/compiler/CodeGen.cpp | 64 ++++++++++++++++++++++++++++++++++++-- src/compiler/CodeGen.hpp | 1 + src/wasm/ThetaLangCore.wat | 21 +++++++++++-- 4 files changed, 84 insertions(+), 8 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6eb274f..231320b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -41,12 +41,12 @@ add_executable(theta ${SRC_FILES} ${MAIN_SRC}) # Add a custom command to copy the WAT file set(WAT_FILE_SRC "${CMAKE_SOURCE_DIR}/src/wasm/ThetaLangCore.wat") set(WAT_FILE_DEST "${CMAKE_BINARY_DIR}/wasm") -add_custom_command( - TARGET theta POST_BUILD +add_custom_target(copy_wat ALL COMMAND ${CMAKE_COMMAND} -E make_directory ${WAT_FILE_DEST} - COMMAND ${CMAKE_COMMAND} -E copy_if_different ${WAT_FILE_SRC} ${WAT_FILE_DEST} + COMMAND ${CMAKE_COMMAND} -E copy ${WAT_FILE_SRC} ${WAT_FILE_DEST} COMMENT "Copying WAT file to build directory" ) +add_dependencies(copy_wat theta) # Add the readline library if (WIN32) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index b57e7c1..cd5fd53 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -489,6 +489,8 @@ namespace Theta { vector paramMemPointers; + // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory + // That way, we save a bunch of store and load calls, and can just skip to the call_indirect for (auto arg : funcInvNode->getParameters()->getElements()) { int byteSize = calculateLiteralByteSize(arg); int memLocation = memoryOffset; @@ -519,9 +521,43 @@ namespace Theta { copy(storage.second.begin(), storage.second.end(), back_inserter(expressions)); - // TODO: replace with call_indirect - expressions.push_back(BinaryenConst(module, BinaryenLiteralInt64(storage.first))); + // If we're at 0 arity we can go ahead and execute the function call + if (closure.getArity() == 0) { + BinaryenExpressionRef* operands = new BinaryenExpressionRef[closure.getArgPointers().size()]; + + for (int i = 0; i < closure.getArgPointers().size(); i++) { + shared_ptr arg = funcInvNode->getParameters()->getElements().at(i); + operands[i] = BinaryenLoad( + module, + calculateLiteralByteSize(arg), + false, // TODO: Support signed values! + 0, + 0, + getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), // TODO: fix the hardcoded stuff here + BinaryenConst(module, BinaryenLiteralInt32(closure.getArgPointers().at(i))), + MEMORY_NAME.c_str() + ); + } + + pair fnTypes = getBinaryenTypeForFunctionDeclaration(ref); + + expressions.push_back( + BinaryenCallIndirect( + module, + FN_TABLE_NAME.c_str(), + BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionIndex())), + operands, + closure.getArgPointers().size(), + fnTypes.first, + fnTypes.second + ) + ); + } else { + // Otherwise we just return a pointer to the function, for later use + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(storage.first))); + } + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; for (int i = 0; i < expressions.size(); i++) { blockExpressions[i] = expressions.at(i); @@ -782,6 +818,23 @@ namespace Theta { if (typeDeclaration->getType() == DataTypes::FUNCTION) return BinaryenTypeInt32(); } + pair CodeGen::getBinaryenTypeForFunctionDeclaration(shared_ptr function) { + int totalParams = function->getParameters()->getElements().size(); + + BinaryenType* types = new BinaryenType[totalParams]; + for (int i = 0; i < totalParams; i++) { + types[i] = getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(function->getParameters()->getElements().at(i)->getValue())); + } + + BinaryenType paramType = BinaryenTypeCreate(types, totalParams); + + BinaryenType returnType = getBinaryenTypeFromTypeDeclaration( + dynamic_pointer_cast(TypeChecker::getFunctionReturnType(function)) + ); + + return make_pair(paramType, returnType); + } + void CodeGen::hoistCapsuleElements(vector> elements) { scope.enterScope(); @@ -855,9 +908,14 @@ namespace Theta { return nullptr; } + // Add a null terminator at the end + buffer.push_back('\0'); + + BinaryenModuleRef module = BinaryenModuleParse(buffer.data()); + file.close(); - return BinaryenModuleParse(buffer.data()); + return module; } string CodeGen::resolveAbsolutePath(string relativePath) { diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index a228a98..4686a35 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -61,6 +61,7 @@ namespace Theta { static BinaryenOp getBinaryenOpFromBinOpNode(shared_ptr node); static BinaryenType getBinaryenTypeFromTypeDeclaration(shared_ptr node); + static pair getBinaryenTypeForFunctionDeclaration(shared_ptr node); void hoistCapsuleElements(vector> elements); void bindIdentifierToScope(shared_ptr ast); diff --git a/src/wasm/ThetaLangCore.wat b/src/wasm/ThetaLangCore.wat index a1be4ae..ac0032a 100644 --- a/src/wasm/ThetaLangCore.wat +++ b/src/wasm/ThetaLangCore.wat @@ -1,5 +1,22 @@ (module - (func $Theta.Function.executeIndirect (result i32) - i32.const 2 + (memory $0 1 10) + (func $Theta.Function.executeIndirect (param $fn_idx i32) (result i32) (local $arity i32) + (local.set $arity + (i32.load + (i32.add + (local.get $fn_idx) + (i32.const 4) + ) + ) + ) + (if (result i32) + (i32.eqz (local.get $arity)) + (then + (i32.const 1) + ) + (else + (local.get $fn_idx) + ) + ) ) ) From 9d898e44b840f31da4de71fdd346f217fabec465 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 11 Aug 2024 13:38:30 -0400 Subject: [PATCH 14/38] chore: clean up codegen a bit --- src/compiler/CodeGen.cpp | 143 +++++++++++++++++++++------------------ src/compiler/CodeGen.hpp | 33 +++++++-- 2 files changed, 104 insertions(+), 72 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index cd5fd53..48ceada 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -51,7 +51,7 @@ namespace Theta { module, 1, // IMPORTANT: Memory size is dictated in pages, NOT bytes, where each page is 64k 10, - "memory", + "memory", // TODO: We don't actually want to export this -- just for now NULL, NULL, NULL, @@ -82,7 +82,9 @@ namespace Theta { } else if (node->getNodeType() == ASTNode::RETURN) { return generateReturn(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { - generateClosure(dynamic_pointer_cast(node), module); + // The only time we should get here is if we have a function defined inside a function, + // because the normal function declaration flow goes through the generateAssignment flow + simplifyNestedFunctionDeclaration(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::FUNCTION_INVOCATION) { return generateFunctionInvocation(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::CONTROL_FLOW) { @@ -173,28 +175,30 @@ namespace Theta { ); } - shared_ptr closure = generateClosure( + shared_ptr simplifiedDeclaration = simplifyNestedFunctionDeclaration( dynamic_pointer_cast(assignmentNode->getRight()), module ); - string closureName = generateFunctionHash(closure); + string simplifiedDeclarationName = generateFunctionHash(simplifiedDeclaration); generateFunctionDeclaration( - closureName, - closure, + simplifiedDeclarationName, + simplifiedDeclaration, module ); - string qualifiedClosureName = Compiler::getQualifiedFunctionIdentifier( - closureName, - dynamic_pointer_cast(closure) + string qualifiedFunctionName = Compiler::getQualifiedFunctionIdentifier( + simplifiedDeclarationName, + dynamic_pointer_cast(simplifiedDeclaration) ); - int functionIndex = functionNameToClosureTemplateMap.find(qualifiedClosureName)->second.getFunctionIndex(); + int functionIndex = functionNameToClosureTemplateMap.find(qualifiedFunctionName)->second.getFunctionIndex(); - closure->setMappedBinaryenIndex(idxOfAssignment); - scope.insert(assignmentIdentifier, closure); + simplifiedDeclaration->setMappedBinaryenIndex(idxOfAssignment); + + // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced + scope.insert(assignmentIdentifier, simplifiedDeclaration); return BinaryenLocalSet( module, @@ -207,7 +211,10 @@ namespace Theta { } // Transforms nested function declarations and generates an anonymous function in the function table - shared_ptr CodeGen::generateClosure(shared_ptr fnDeclNode, BinaryenModuleRef &module) { + shared_ptr CodeGen::simplifyNestedFunctionDeclaration( + shared_ptr fnDeclNode, + BinaryenModuleRef &module + ) { // Capture the outer scope set requiredScopeIdentifiers; set paramIdentifiers; @@ -231,76 +238,63 @@ namespace Theta { requiredScopeIdentifiers.insert(identifierName); } - shared_ptr closure = make_shared(nullptr); - closure->setResolvedType(Compiler::deepCopyTypeDeclaration( - dynamic_pointer_cast(fnDeclNode->getResolvedType()), - closure - )); - - - vector> closureParameters; - vector> closureExpressions; + vector> simplifiedDeclarationParameters; + vector> simplifiedDeclarationExpressions; - collectClosureScope(fnDeclNode, requiredScopeIdentifiers, closureParameters, closureExpressions); + collectClosureScope(fnDeclNode, requiredScopeIdentifiers, simplifiedDeclarationParameters, simplifiedDeclarationExpressions); // The collectClosureScope function will collect the parameters in reverse order, in order to preserve the order // in which the params are passed throughout the ancestry path, so we need to reverse it back here to get the // correct order - reverse(closureParameters.begin(), closureParameters.end()); - reverse(closureExpressions.begin(), closureExpressions.end()); + reverse(simplifiedDeclarationParameters.begin(), simplifiedDeclarationParameters.end()); + reverse(simplifiedDeclarationExpressions.begin(), simplifiedDeclarationExpressions.end()); - closureParameters.insert( - closureParameters.end(), + // Add the most immediate-level function's parameters to the end + simplifiedDeclarationParameters.insert( + simplifiedDeclarationParameters.end(), fnDeclNode->getParameters()->getElements().begin(), fnDeclNode->getParameters()->getElements().end() ); - vector> originalFnExpressions = dynamic_pointer_cast(fnDeclNode->getDefinition())->getElements(); + vector> originalFnExpressions = + dynamic_pointer_cast(fnDeclNode->getDefinition())->getElements(); - closureExpressions.insert( - closureExpressions.end(), + simplifiedDeclarationExpressions.insert( + simplifiedDeclarationExpressions.end(), originalFnExpressions.begin(), originalFnExpressions.end() ); + + shared_ptr simplifiedDeclaration = make_shared(nullptr); + simplifiedDeclaration->setResolvedType(Compiler::deepCopyTypeDeclaration( + dynamic_pointer_cast(fnDeclNode->getResolvedType()), + simplifiedDeclaration + )); - shared_ptr parametersNode = make_shared(closure); - parametersNode->setElements(closureParameters); - closure->setParameters(parametersNode); + shared_ptr parametersNode = make_shared(simplifiedDeclaration); + parametersNode->setElements(simplifiedDeclarationParameters); + simplifiedDeclaration->setParameters(parametersNode); - shared_ptr closureBody = make_shared(closure); - closureBody->setElements(closureExpressions); - closure->setDefinition(closureBody); + shared_ptr simplifiedDeclarationBody = make_shared(simplifiedDeclaration); + simplifiedDeclarationBody->setElements(simplifiedDeclarationExpressions); + simplifiedDeclaration->setDefinition(simplifiedDeclarationBody); // If we've traversed the tree for parameters and we still have some missing identifiers, they must be defined in bodies if (requiredScopeIdentifiers.size() > 0) { - cout << "\033[1;31mFATAL ERROR: Could not locate necessary closure identifiers!\033[0m" << endl; - cout << " Missed identifiers: "; + cerr << "\033[1;31mFATAL ERROR: Could not locate necessary closure identifiers!\033[0m" << endl; + cerr << " Missed identifiers: "; for (int i = 0; i < requiredScopeIdentifiers.size(); i++) { - if (i > 0) cout << ", "; - cout << i; + if (i > 0) cerr << ", "; + cerr << i; } - cout << endl << "This error is not caused by your code, but rather an issue with the compiler itself. Please report an issue at " << CLI::makeLink("https://github.com/alexdovzhanyn/ThetaLang/issues"); + cerr << endl << "This error is not caused by your code, but rather an issue with the compiler itself. Please report an issue at " << CLI::makeLink("https://github.com/alexdovzhanyn/ThetaLang/issues"); exit(1); } - return closure; + return simplifiedDeclaration; } - - string CodeGen::generateFunctionHash(shared_ptr function) { - hash hasher; - - size_t hashed = hasher(function->toJSON()); - - ostringstream stream; - - stream << hex << nouppercase << setw(sizeof(size_t) * 2) << setfill('0'); - - stream << hashed; - - return stream.str(); - } - + void CodeGen::collectClosureScope( shared_ptr node, set &identifiersToFind, @@ -473,7 +467,12 @@ namespace Theta { ); } - BinaryenExpressionRef CodeGen::generateIndirectInvocation(shared_ptr funcInvNode, shared_ptr reference, BinaryenModuleRef &module) { + // TODO: This needs to be refactored + BinaryenExpressionRef CodeGen::generateIndirectInvocation( + shared_ptr funcInvNode, + shared_ptr reference, + BinaryenModuleRef &module + ) { if (reference->getNodeType() == ASTNode::FUNCTION_DECLARATION) { shared_ptr ref = dynamic_pointer_cast(reference); string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); @@ -482,11 +481,9 @@ namespace Theta { cout << "Looking for closure template: " << refIdentifier << endl; WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; - WasmClosure closure = WasmClosure::clone(closureTemplate); vector expressions; - vector paramMemPointers; // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory @@ -499,6 +496,7 @@ namespace Theta { memoryOffset += byteSize; + // Store each passed argument into memory expressions.push_back( BinaryenStore( module, @@ -515,11 +513,9 @@ namespace Theta { closure.addArgs(paramMemPointers); - pair> storage = generateClosureMemoryStore(closure, module); - - vector temp = storage.second; + auto [closurePointer, storageExpressions] = generateClosureMemoryStore(closure, module); - copy(storage.second.begin(), storage.second.end(), back_inserter(expressions)); + copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); // If we're at 0 arity we can go ahead and execute the function call if (closure.getArity() == 0) { @@ -554,8 +550,7 @@ namespace Theta { ) ); } else { - // Otherwise we just return a pointer to the function, for later use - expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(storage.first))); + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closurePointer))); } BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; @@ -566,11 +561,11 @@ namespace Theta { return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeInt64()); } + // TODO: Implement shared_ptr ref = dynamic_pointer_cast(reference); string refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); functionNameToClosureTemplateMap.find(Compiler::getQualifiedFunctionIdentifier(refIdentifier, reference)); - } BinaryenExpressionRef CodeGen::generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module) { @@ -951,4 +946,18 @@ namespace Theta { return dirPath + "/" + relativePath; } + + string CodeGen::generateFunctionHash(shared_ptr function) { + hash hasher; + + size_t hashed = hasher(function->toJSON()); + + ostringstream stream; + + stream << hex << nouppercase << setw(sizeof(size_t) * 2) << setfill('0'); + + stream << hashed; + + return stream.str(); + } } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 4686a35..2087414 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -32,10 +32,18 @@ namespace Theta { BinaryenExpressionRef generateAssignment(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateBlock(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateReturn(shared_ptr node, BinaryenModuleRef &module); - BinaryenExpressionRef generateFunctionDeclaration(string identifier, shared_ptr node, BinaryenModuleRef &module, bool addToExports = false); - shared_ptr generateClosure(shared_ptr node, BinaryenModuleRef &module); + BinaryenExpressionRef generateFunctionDeclaration( + string identifier, + shared_ptr node, + BinaryenModuleRef &module, + bool addToExports = false + ); BinaryenExpressionRef generateFunctionInvocation(shared_ptr node, BinaryenModuleRef &module); - BinaryenExpressionRef generateIndirectInvocation(shared_ptr node, shared_ptr reference, BinaryenModuleRef &module); + BinaryenExpressionRef generateIndirectInvocation( + shared_ptr node, + shared_ptr reference, + BinaryenModuleRef &module + ); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateBinaryOperation(shared_ptr node, BinaryenModuleRef &module); @@ -46,6 +54,11 @@ namespace Theta { BinaryenExpressionRef generateExponentOperation(shared_ptr node, BinaryenModuleRef &module); void generateSource(shared_ptr node, BinaryenModuleRef &module); + shared_ptr simplifyNestedFunctionDeclaration( + shared_ptr node, + BinaryenModuleRef &module + ); + private: SymbolTableStack scope; string FN_TABLE_NAME = "0"; @@ -57,7 +70,12 @@ namespace Theta { BinaryenModuleRef initializeWasmModule(); - BinaryenExpressionRef generateStringBinaryOperation(string op, BinaryenExpressionRef left, BinaryenExpressionRef right, BinaryenModuleRef &module); + BinaryenExpressionRef generateStringBinaryOperation( + string op, + BinaryenExpressionRef left, + BinaryenExpressionRef right, + BinaryenModuleRef &modul + ); static BinaryenOp getBinaryenOpFromBinOpNode(shared_ptr node); static BinaryenType getBinaryenTypeFromTypeDeclaration(shared_ptr node); @@ -69,7 +87,12 @@ namespace Theta { pair> generateClosureMemoryStore(WasmClosure closure, BinaryenModuleRef &module); - void collectClosureScope(shared_ptr node, set &identifiersToFind, vector> ¶meters, vector> &bodyExpressions); + void collectClosureScope( + shared_ptr node, + set &identifiersToFind, + vector> ¶meters, + vector> &bodyExpression + ); string generateFunctionHash(shared_ptr function); From 9dfa5a6cca98bbdc36de16663b9723f5081851f3 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 12 Aug 2024 21:18:43 -0400 Subject: [PATCH 15/38] better error message when function reference cant be found --- src/compiler/CodeGen.cpp | 14 +++++++++++--- src/compiler/Compiler.cpp | 5 +++-- src/compiler/TypeChecker.cpp | 13 +++++++++++-- 3 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 48ceada..6c35408 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -168,10 +168,14 @@ namespace Theta { scope.insert(identName, assignmentRhs); + BinaryenExpressionRef generated = generate(assignmentRhs, module); + cout << "here after generate: " << assignmentRhs->toJSON() << endl; + BinaryenExpressionPrint(generated); + return BinaryenLocalSet( module, idxOfAssignment, - generate(assignmentRhs, module) + generated ); } @@ -402,6 +406,9 @@ namespace Theta { generate(fnDeclNode->getDefinition(), module) ); + cout << "teehee" << endl; + BinaryenModulePrint(module); + // Only add to the closure template map if its not already in there. It may have been added during hoisting if (functionNameToClosureTemplateMap.find(functionName) == functionNameToClosureTemplateMap.end()) { functionNameToClosureTemplateMap.insert(make_pair( @@ -552,16 +559,17 @@ namespace Theta { } else { expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closurePointer))); } - + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; for (int i = 0; i < expressions.size(); i++) { blockExpressions[i] = expressions.at(i); } - + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeInt64()); } // TODO: Implement + cout << "AHA! im here!" << endl; shared_ptr ref = dynamic_pointer_cast(reference); string refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); diff --git a/src/compiler/Compiler.cpp b/src/compiler/Compiler.cpp index 5428aea..f2a3e23 100644 --- a/src/compiler/Compiler.cpp +++ b/src/compiler/Compiler.cpp @@ -37,6 +37,7 @@ namespace Theta { if (isEmitWAT) { cout << "Generated WAT for \"" + entrypoint + "\":" << endl; BinaryenModulePrint(module); + cout << "done" << endl; } writeModuleToFile(module, outputFile); @@ -248,8 +249,8 @@ namespace Theta { string Compiler::getQualifiedFunctionIdentifierFromTypeSignature(string variableName, shared_ptr typeSig) { vector> params; - // If typeSig has a value, that means the function takes in no parameters and only has a return value - if (typeSig->getValue() == nullptr) { + // If typeSig is a function, and it has a value, that means the function takes in no parameters and only has a return value + if (typeSig->getType() == DataTypes::FUNCTION && typeSig->getValue() == nullptr) { params.resize(typeSig->getElements().size() - 1); copy(typeSig->getElements().begin(), typeSig->getElements().end() - 1, params.begin()); } diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index 931228e..88fc5a5 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -348,9 +348,18 @@ namespace Theta { shared_ptr referencedFunction = lookupInScope(uniqueFuncIdentifier); if (!referencedFunction) { - cout << "shleem: " << uniqueFuncIdentifier << endl; + string paramTypes = "("; - Compiler::getInstance().addException(make_shared(funcIdentifier)); + for (int i = 0; i < node->getParameters()->getElements().size(); i++) { + if (i > 0) paramTypes += ", "; + paramTypes += dynamic_pointer_cast( + node->getParameters()->getElements().at(i)->getResolvedType() + )->toString(); + } + + paramTypes += ")"; + + Compiler::getInstance().addException(make_shared(funcIdentifier + paramTypes)); return false; } From 208340a909affb47b1014944360729bbffa79ccd Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 13 Aug 2024 20:47:17 -0400 Subject: [PATCH 16/38] needs lots of cleanup but getting closer --- src/compiler/CodeGen.cpp | 162 +++++++++++++++++++++++++++++++---- src/compiler/CodeGen.hpp | 9 +- src/compiler/Pointer.hpp | 23 +++++ src/compiler/WasmClosure.hpp | 56 +++++++++--- 4 files changed, 221 insertions(+), 29 deletions(-) create mode 100644 src/compiler/Pointer.hpp diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 6c35408..7b4acb3 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -179,8 +179,10 @@ namespace Theta { ); } + shared_ptr originalDeclaration = dynamic_pointer_cast(assignmentNode->getRight()); + shared_ptr simplifiedDeclaration = simplifyNestedFunctionDeclaration( - dynamic_pointer_cast(assignmentNode->getRight()), + originalDeclaration, module ); @@ -197,23 +199,89 @@ namespace Theta { dynamic_pointer_cast(simplifiedDeclaration) ); - int functionIndex = functionNameToClosureTemplateMap.find(qualifiedFunctionName)->second.getFunctionIndex(); + pair> storage = generateAndStoreClosure( + qualifiedFunctionName, + simplifiedDeclaration, + originalDeclaration, + module + ); simplifiedDeclaration->setMappedBinaryenIndex(idxOfAssignment); // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced scope.insert(assignmentIdentifier, simplifiedDeclaration); + // Returns a reference to the closure memory address return BinaryenLocalSet( module, idxOfAssignment, BinaryenConst( module, - BinaryenLiteralInt32(functionIndex) + BinaryenLiteralInt32(storage.first.getPointer().getAddress()) ) ); } + pair> CodeGen::generateAndStoreClosure( + string qualifiedReferenceFunctionName, + shared_ptr simplifiedReference, + shared_ptr originalReference, + BinaryenModuleRef &module + ) { + Pointer referencePtr = functionNameToClosureTemplateMap.find(qualifiedReferenceFunctionName)->second.getFunctionPointer(); + set originalParameters; + + for (auto param : originalReference->getParameters()->getElements()) { + originalParameters.insert(dynamic_pointer_cast(param)->getIdentifier()); + } + + vector expressions; + vector> argPointers; + + for (auto param : simplifiedReference->getParameters()->getElements()) { + string paramName = dynamic_pointer_cast(param)->getIdentifier(); + + if (originalParameters.find(paramName) == originalParameters.end()) continue; + + shared_ptr paramValue = scope.lookup(paramName); + cout << "about to calculate size" << endl; + cout << paramValue->toJSON() << endl; + + // TODO: this is causing build failure. paramValue isnt a literal in this case + int byteSize = calculateLiteralByteSize(paramValue); + cout << "calculated it" << endl; + expressions.push_back( + BinaryenStore( + module, + byteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), + generate(paramValue, module), + getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(param->getResolvedType())), + MEMORY_NAME.c_str() + ) + ); + + argPointers.push_back(Pointer(memoryOffset)); + + // TODO: change to only increment memoryOffset after the loop finishes + memoryOffset += byteSize; + } + + WasmClosure closure = WasmClosure( + referencePtr, + simplifiedReference->getParameters()->getElements().size(), + argPointers + ); + + vector storageExpressions = generateClosureMemoryStore(closure, module); + + copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); + + return make_pair(closure, expressions); + } + // Transforms nested function declarations and generates an anonymous function in the function table shared_ptr CodeGen::simplifyNestedFunctionDeclaration( shared_ptr fnDeclNode, @@ -413,7 +481,10 @@ namespace Theta { if (functionNameToClosureTemplateMap.find(functionName) == functionNameToClosureTemplateMap.end()) { functionNameToClosureTemplateMap.insert(make_pair( functionName, - WasmClosure(functionNameToClosureTemplateMap.size(), totalParams) + WasmClosure( + Pointer(functionNameToClosureTemplateMap.size()), + totalParams + ) )); } @@ -480,9 +551,10 @@ namespace Theta { shared_ptr reference, BinaryenModuleRef &module ) { + string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); + if (reference->getNodeType() == ASTNode::FUNCTION_DECLARATION) { shared_ptr ref = dynamic_pointer_cast(reference); - string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); string refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); cout << "Looking for closure template: " << refIdentifier << endl; @@ -491,7 +563,7 @@ namespace Theta { WasmClosure closure = WasmClosure::clone(closureTemplate); vector expressions; - vector paramMemPointers; + vector> paramMemPointers; // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory // That way, we save a bunch of store and load calls, and can just skip to the call_indirect @@ -499,7 +571,7 @@ namespace Theta { int byteSize = calculateLiteralByteSize(arg); int memLocation = memoryOffset; - paramMemPointers.push_back(memLocation); + paramMemPointers.push_back(Pointer(memLocation)); memoryOffset += byteSize; @@ -520,7 +592,7 @@ namespace Theta { closure.addArgs(paramMemPointers); - auto [closurePointer, storageExpressions] = generateClosureMemoryStore(closure, module); + vector storageExpressions = generateClosureMemoryStore(closure, module); copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); @@ -538,7 +610,7 @@ namespace Theta { 0, 0, getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), // TODO: fix the hardcoded stuff here - BinaryenConst(module, BinaryenLiteralInt32(closure.getArgPointers().at(i))), + BinaryenConst(module, BinaryenLiteralInt32(closure.getArgPointers().at(i).getAddress())), MEMORY_NAME.c_str() ); } @@ -549,7 +621,7 @@ namespace Theta { BinaryenCallIndirect( module, FN_TABLE_NAME.c_str(), - BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionIndex())), + BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionPointer().getAddress())), operands, closure.getArgPointers().size(), fnTypes.first, @@ -557,7 +629,7 @@ namespace Theta { ) ); } else { - expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closurePointer))); + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closure.getPointer().getAddress()))); } BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; @@ -573,7 +645,61 @@ namespace Theta { shared_ptr ref = dynamic_pointer_cast(reference); string refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); - functionNameToClosureTemplateMap.find(Compiler::getQualifiedFunctionIdentifier(refIdentifier, reference)); + vector expressions; + vector paramMemPointers; + + string qualifiedInvName = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); + + shared_ptr inScope = scope.lookup(qualifiedInvName); + + for (auto arg : funcInvNode->getParameters()->getElements()) { + int byteSize = calculateLiteralByteSize(arg); + int memLocation = memoryOffset; + + paramMemPointers.push_back(memLocation); + + // TODO: instead of incrementing memoryoffset each time, lets change to only increment after the loop, and then + // for each generated store operation just use the offset field instead of 0 + memoryOffset += byteSize; + + expressions.push_back( + BinaryenStore( + module, + byteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memLocation)), + generate(arg, module), + getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), + MEMORY_NAME.c_str() + ) + ); + } + + cout << "meow" << endl; + cout << inScope->toJSON() << endl; + + BinaryenExpressionPrint( + BinaryenLoad( + module, + 4, + false, + 0, + 0, + BinaryenTypeInt32(), + BinaryenLocalGet( // TODO: need to check if is -1 and globalget instead + module, + inScope->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + MEMORY_NAME.c_str() + ) + ); + + //functionNameToClosureTemplateMap + + + cout << "teehee" << endl; } BinaryenExpressionRef CodeGen::generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module) { @@ -762,15 +888,15 @@ namespace Theta { } } - pair> CodeGen::generateClosureMemoryStore(WasmClosure closure, BinaryenModuleRef &module) { + vector CodeGen::generateClosureMemoryStore(WasmClosure &closure, BinaryenModuleRef &module) { // At least 4 bytes for the fn_idx and 4 bytes for the arity. Then 4 bytes for each parameter the closure takes. // We also multiply the remaining arity, since not all parameters may have been applied to the function int totalMemSize = 8 + (closure.getArgPointers().size() * 4) + (closure.getArity() * 4); int memLocation = memoryOffset; - vector closureDataSegments = { closure.getFunctionIndex(), closure.getArity() }; + vector closureDataSegments = { closure.getFunctionPointer().getAddress(), closure.getArity() }; for (int i = 0; i < closure.getArgPointers().size(); i++) { - closureDataSegments.push_back(closure.getArgPointers().at(i)); + closureDataSegments.push_back(closure.getArgPointers().at(i).getAddress()); } vector expressions; @@ -790,7 +916,9 @@ namespace Theta { ); } - return make_pair(memLocation, expressions); + closure.setAddress(memLocation); + + return expressions; } BinaryenOp CodeGen::getBinaryenOpFromBinOpNode(shared_ptr binOpNode) { @@ -873,7 +1001,7 @@ namespace Theta { const char** fnNames = new const char*[functionNameToClosureTemplateMap.size()]; for (auto& [fnName, fnRef] : functionNameToClosureTemplateMap) { - fnNames[fnRef.getFunctionIndex()] = fnName.c_str(); + fnNames[fnRef.getFunctionPointer().getAddress()] = fnName.c_str(); } BinaryenAddActiveElementSegment( diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 2087414..258bd48 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -85,7 +85,14 @@ namespace Theta { void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); - pair> generateClosureMemoryStore(WasmClosure closure, BinaryenModuleRef &module); + pair> generateAndStoreClosure( + string qualifiedReferenceFunctionName, + shared_ptr simplifiedReference, + shared_ptr originalReference, + BinaryenModuleRef &module + ); + + vector generateClosureMemoryStore(WasmClosure &closure, BinaryenModuleRef &module); void collectClosureScope( shared_ptr node, diff --git a/src/compiler/Pointer.hpp b/src/compiler/Pointer.hpp new file mode 100644 index 0000000..1dfcc2c --- /dev/null +++ b/src/compiler/Pointer.hpp @@ -0,0 +1,23 @@ + + +namespace Theta { + enum PointerType { + Function, + Closure, + Data + }; + + template + class Pointer { + public: + Pointer() : address(-1) {} + Pointer(int addr) : address(addr) {} + + PointerType getType() { return type; } + + int getAddress() { return address; } + + private: + int address; + }; +} diff --git a/src/compiler/WasmClosure.hpp b/src/compiler/WasmClosure.hpp index 4d90344..03c59a0 100644 --- a/src/compiler/WasmClosure.hpp +++ b/src/compiler/WasmClosure.hpp @@ -1,43 +1,77 @@ #pragma once +#include #include +#include +#include "Pointer.hpp" using namespace std; namespace Theta { class WasmClosure { public: - WasmClosure(int tableIndex, int initialArity) { - idx = tableIndex; - arity = initialArity; + WasmClosure(Pointer ptr, int initialArity) : fnPointer(ptr), arity(initialArity) { + argPointers.resize(arity); + } + WasmClosure( + Pointer ptr, + int initialArity, + vector> args + ) : fnPointer(ptr), arity(initialArity), argPointers(args) { argPointers.resize(arity); } - int getFunctionIndex() { return idx; } + void setAddress(int closureMemAddress) { + pointer = Pointer(closureMemAddress); + } + + Pointer getPointer() { return pointer; } + + Pointer getFunctionPointer() { return fnPointer; } int getArity() { return arity; } - vector getArgPointers() { return argPointers; } + vector> getArgPointers() { return argPointers; } - void addArgs(vector argPtrs) { - for (int argPtr : argPtrs) { + void addArgs(vector> argPtrs) { + for (auto argPtr : argPtrs) { argPointers[arity - 1] = argPtr; arity--; } } + string toJSON() { + ostringstream oss; + + oss << "{"; + oss << "\"ptr\": \"" << to_string(fnPointer.getAddress()) << "\""; + oss << ", \"arity\": " << to_string(arity); + oss << ", \"argPointers\": ["; + + for (int i = 0; i < argPointers.size(); i++) { + if (i > 0) oss << ", "; + + oss << to_string(argPointers[i].getAddress()); + } + + oss << "] "; + oss << "}"; + + return oss.str(); + } + static WasmClosure clone(WasmClosure toClone) { return WasmClosure( - toClone.getFunctionIndex(), + toClone.getFunctionPointer(), toClone.arity ); } - private: - int idx; + Pointer pointer; + Pointer fnPointer; int arity; - vector argPointers; + vector> argPointers; }; } From b4d1fb770756a4f8c95e2095082249856bc98557 Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 14 Aug 2024 18:59:41 -0400 Subject: [PATCH 17/38] correctly generate closure storage and pointer return --- src/compiler/CodeGen.cpp | 80 ++++++++++++++++++++++++--------------- src/compiler/CodeGen.hpp | 2 +- src/compiler/Compiler.cpp | 1 - 3 files changed, 50 insertions(+), 33 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 7b4acb3..820246a 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -211,15 +211,32 @@ namespace Theta { // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced scope.insert(assignmentIdentifier, simplifiedDeclaration); + vector expressions = storage.second; + // Returns a reference to the closure memory address - return BinaryenLocalSet( - module, - idxOfAssignment, - BinaryenConst( + expressions.push_back( + BinaryenLocalSet( module, - BinaryenLiteralInt32(storage.first.getPointer().getAddress()) + idxOfAssignment, + BinaryenConst( + module, + BinaryenLiteralInt32(storage.first.getPointer().getAddress()) + ) ) ); + + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; + for (int i = 0; i < expressions.size(); i++) { + blockExpressions[i] = expressions.at(i); + } + + return BinaryenBlock( + module, + NULL, + blockExpressions, + expressions.size(), + BinaryenTypeInt32() + ); } pair> CodeGen::generateAndStoreClosure( @@ -241,15 +258,12 @@ namespace Theta { for (auto param : simplifiedReference->getParameters()->getElements()) { string paramName = dynamic_pointer_cast(param)->getIdentifier(); - if (originalParameters.find(paramName) == originalParameters.end()) continue; + if (originalParameters.find(paramName) != originalParameters.end()) continue; shared_ptr paramValue = scope.lookup(paramName); - cout << "about to calculate size" << endl; - cout << paramValue->toJSON() << endl; + shared_ptr paramType = dynamic_pointer_cast(param->getValue()); - // TODO: this is causing build failure. paramValue isnt a literal in this case - int byteSize = calculateLiteralByteSize(paramValue); - cout << "calculated it" << endl; + int byteSize = getByteSizeForType(paramType); expressions.push_back( BinaryenStore( module, @@ -258,7 +272,7 @@ namespace Theta { 0, BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), generate(paramValue, module), - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(param->getResolvedType())), + getBinaryenTypeFromTypeDeclaration(paramType), MEMORY_NAME.c_str() ) ); @@ -474,9 +488,6 @@ namespace Theta { generate(fnDeclNode->getDefinition(), module) ); - cout << "teehee" << endl; - BinaryenModulePrint(module); - // Only add to the closure template map if its not already in there. It may have been added during hoisting if (functionNameToClosureTemplateMap.find(functionName) == functionNameToClosureTemplateMap.end()) { functionNameToClosureTemplateMap.insert(make_pair( @@ -568,7 +579,7 @@ namespace Theta { // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory // That way, we save a bunch of store and load calls, and can just skip to the call_indirect for (auto arg : funcInvNode->getParameters()->getElements()) { - int byteSize = calculateLiteralByteSize(arg); + int byteSize = getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())); int memLocation = memoryOffset; paramMemPointers.push_back(Pointer(memLocation)); @@ -605,7 +616,7 @@ namespace Theta { operands[i] = BinaryenLoad( module, - calculateLiteralByteSize(arg), + getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())), false, // TODO: Support signed values! 0, 0, @@ -653,7 +664,7 @@ namespace Theta { shared_ptr inScope = scope.lookup(qualifiedInvName); for (auto arg : funcInvNode->getParameters()->getElements()) { - int byteSize = calculateLiteralByteSize(arg); + int byteSize = getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())); int memLocation = memoryOffset; paramMemPointers.push_back(memLocation); @@ -746,22 +757,28 @@ namespace Theta { } BinaryenExpressionRef CodeGen::generateIdentifier(shared_ptr identNode, BinaryenModuleRef &module) { - shared_ptr identInScope = scope.lookup(identNode->getIdentifier()); + string identName = identNode->getIdentifier(); + shared_ptr identInScope = scope.lookup(identName); + + // The ident in this case may refer to a parameter to a function, which may not have a resolvedType + shared_ptr type = dynamic_pointer_cast( + identInScope->getResolvedType() + ? identInScope->getResolvedType() + : identInScope->getValue() + ); if (identInScope->getMappedBinaryenIndex() == -1) { - string identName = identNode->getIdentifier(); - return BinaryenGlobalGet( module, identName.c_str(), - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(identInScope->getResolvedType())) + getBinaryenTypeFromTypeDeclaration(type) ); } return BinaryenLocalGet( module, identInScope->getMappedBinaryenIndex(), - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(identNode->getResolvedType())) + getBinaryenTypeFromTypeDeclaration(type) ); } @@ -1014,15 +1031,16 @@ namespace Theta { ); } - int CodeGen::calculateLiteralByteSize(shared_ptr literal) { - if (literal->getNodeType() == ASTNode::BOOLEAN_LITERAL) return 4; - if (literal->getNodeType() == ASTNode::NUMBER_LITERAL) return 8; - if (literal->getNodeType() == ASTNode::STRING_LITERAL) { - cout << "WARNING! String byte size count has not been implemented." << endl; - return 100; - } + int CodeGen::getByteSizeForType(shared_ptr type) { + if (type->getType() == DataTypes::NUMBER) return 8; + if (type->getType() == DataTypes::BOOLEAN) return 4; + // TODO: Figure out if this holds true. According to + // https://github.com/WebAssembly/stringref/blob/main/proposals/stringref/Overview.md#the-stringref-facility + // stringrefs are either i32 or i64 + if (type->getType() == DataTypes::STRING) return 8; - throw new runtime_error("No cant calculate byte size for non-literal"); + cout << "Not implemented for type: " << type->getType() << endl; + throw new runtime_error("Not implemented"); } BinaryenModuleRef CodeGen::importCoreLangWasm() { diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 258bd48..e23f4bf 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -103,7 +103,7 @@ namespace Theta { string generateFunctionHash(shared_ptr function); - int calculateLiteralByteSize(shared_ptr literal); + int getByteSizeForType(shared_ptr type); BinaryenModuleRef importCoreLangWasm(); diff --git a/src/compiler/Compiler.cpp b/src/compiler/Compiler.cpp index f2a3e23..119c3f6 100644 --- a/src/compiler/Compiler.cpp +++ b/src/compiler/Compiler.cpp @@ -37,7 +37,6 @@ namespace Theta { if (isEmitWAT) { cout << "Generated WAT for \"" + entrypoint + "\":" << endl; BinaryenModulePrint(module); - cout << "done" << endl; } writeModuleToFile(module, outputFile); From 68bddf784aa9e311effdaca63e2913e90f333338 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 15 Aug 2024 20:52:28 -0400 Subject: [PATCH 18/38] correct lookups of globally anonymous functions from within local scope --- src/compiler/CodeGen.cpp | 100 ++++++++++++------ src/compiler/CodeGen.hpp | 6 +- src/compiler/SymbolTable.hpp | 14 +-- src/compiler/SymbolTableStack.hpp | 20 ++-- src/compiler/TypeChecker.cpp | 36 ++++--- src/compiler/TypeChecker.hpp | 4 +- .../optimization/LiteralInlinerPass.cpp | 30 +++--- .../optimization/LiteralInlinerPass.hpp | 4 +- .../optimization/OptimizationPass.cpp | 10 +- .../optimization/OptimizationPass.hpp | 8 +- src/parser/ast/ControlFlowNode.hpp | 1 + 11 files changed, 135 insertions(+), 98 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 820246a..676c42b 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -69,7 +69,10 @@ namespace Theta { } BinaryenExpressionRef CodeGen::generate(shared_ptr node, BinaryenModuleRef &module) { - if (node->hasOwnScope()) scope.enterScope(); + if (node->hasOwnScope()) { + scope.enterScope(); + scopeReferences.enterScope(); + } if (node->getNodeType() == ASTNode::SOURCE) { generateSource(dynamic_pointer_cast(node), module); @@ -103,7 +106,10 @@ namespace Theta { return generateBooleanLiteral(dynamic_pointer_cast(node), module); } - if (node->hasOwnScope()) scope.exitScope(); + if (node->hasOwnScope()) { + scope.exitScope(); + scopeReferences.exitScope(); + } return nullptr; } @@ -145,7 +151,7 @@ namespace Theta { // Using a space in scope for an idx counter so we dont have to have a whole separate stack just to keep track of the current // local idx - shared_ptr currentIdentIdx = dynamic_pointer_cast(scope.lookup(LOCAL_IDX_SCOPE_KEY)); + shared_ptr currentIdentIdx = dynamic_pointer_cast(scope.lookup(LOCAL_IDX_SCOPE_KEY).value()); int idxOfAssignment = stoi(currentIdentIdx->getLiteralValue()); currentIdentIdx->setLiteralValue(to_string(idxOfAssignment + 1)); @@ -168,14 +174,10 @@ namespace Theta { scope.insert(identName, assignmentRhs); - BinaryenExpressionRef generated = generate(assignmentRhs, module); - cout << "here after generate: " << assignmentRhs->toJSON() << endl; - BinaryenExpressionPrint(generated); - return BinaryenLocalSet( module, idxOfAssignment, - generated + generate(assignmentRhs, module) ); } @@ -186,21 +188,23 @@ namespace Theta { module ); - string simplifiedDeclarationName = generateFunctionHash(simplifiedDeclaration); + // Generating a unique hash for this function is necessary because it will be stored on the module globally, + // so we need to make sure there are no naming collisions + string simplifiedDeclarationHash = generateFunctionHash(simplifiedDeclaration); generateFunctionDeclaration( - simplifiedDeclarationName, + simplifiedDeclarationHash, simplifiedDeclaration, module ); - string qualifiedFunctionName = Compiler::getQualifiedFunctionIdentifier( - simplifiedDeclarationName, - dynamic_pointer_cast(simplifiedDeclaration) + string globalQualifiedFunctionName = Compiler::getQualifiedFunctionIdentifier( + simplifiedDeclarationHash, + simplifiedDeclaration ); pair> storage = generateAndStoreClosure( - qualifiedFunctionName, + globalQualifiedFunctionName, simplifiedDeclaration, originalDeclaration, module @@ -208,8 +212,15 @@ namespace Theta { simplifiedDeclaration->setMappedBinaryenIndex(idxOfAssignment); - // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced - scope.insert(assignmentIdentifier, simplifiedDeclaration); + string localQualifiedFunctionName = Compiler::getQualifiedFunctionIdentifier( + assignmentIdentifier, + originalDeclaration + ); + + // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced. This + // way the caller does not need to know the global function name in order to call it + scope.insert(globalQualifiedFunctionName, simplifiedDeclaration); + scopeReferences.insert(localQualifiedFunctionName, globalQualifiedFunctionName); vector expressions = storage.second; @@ -260,7 +271,7 @@ namespace Theta { if (originalParameters.find(paramName) != originalParameters.end()) continue; - shared_ptr paramValue = scope.lookup(paramName); + shared_ptr paramValue = scope.lookup(paramName).value(); shared_ptr paramType = dynamic_pointer_cast(param->getValue()); int byteSize = getByteSizeForType(paramType); @@ -318,7 +329,7 @@ namespace Theta { if (paramIdentifiers.find(identifierName) != paramIdentifiers.end()) continue; // If an identifier is globally available we dont need to include it either - shared_ptr inScope = scope.lookup(identifierName); + shared_ptr inScope = scope.lookup(identifierName).value(); if (inScope->getMappedBinaryenIndex() == -1) continue; requiredScopeIdentifiers.insert(identifierName); @@ -441,6 +452,7 @@ namespace Theta { bool addToExports ) { scope.enterScope(); + scopeReferences.enterScope(); BinaryenType parameterType = BinaryenTypeNone(); int totalParams = fnDeclNode->getParameters()->getElements().size(); @@ -504,6 +516,7 @@ namespace Theta { } scope.exitScope(); + scopeReferences.exitScope(); } BinaryenExpressionRef CodeGen::generateBlock(shared_ptr blockNode, BinaryenModuleRef &module) { @@ -538,15 +551,29 @@ namespace Theta { arguments[i] = generate(funcInvNode->getParameters()->getElements().at(i), module); } - shared_ptr foundLocalReference = scope.lookup(funcName); + string scopeLookupIdentifier = funcName; - if (foundLocalReference) { - return generateIndirectInvocation(funcInvNode, foundLocalReference, module); + auto localReference = scopeReferences.lookup(funcName); + if (localReference.has_value()) { + scopeLookupIdentifier = localReference.value(); + } + + auto foundLocalReference = scope.lookup(scopeLookupIdentifier); + + if (foundLocalReference.has_value()) { + return generateIndirectInvocation( + funcInvNode, + foundLocalReference.value(), + module, + scopeLookupIdentifier + ); } // TODO: Check if this needs to be an indirect call, and generate that instead of a normal call. Thats why the current compile // is failing + cout << "AAAAAAAHHH I SHOULD NEVER GET HERE" << endl; + return BinaryenCall( module, funcName.c_str(), @@ -560,16 +587,18 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateIndirectInvocation( shared_ptr funcInvNode, shared_ptr reference, - BinaryenModuleRef &module + BinaryenModuleRef &module, + string refIdentifier ) { string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); if (reference->getNodeType() == ASTNode::FUNCTION_DECLARATION) { shared_ptr ref = dynamic_pointer_cast(reference); - - string refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - cout << "Looking for closure template: " << refIdentifier << endl; - + + if (refIdentifier == "") { + refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); + } + WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; WasmClosure closure = WasmClosure::clone(closureTemplate); @@ -651,17 +680,18 @@ namespace Theta { return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeInt64()); } - // TODO: Implement - cout << "AHA! im here!" << endl; shared_ptr ref = dynamic_pointer_cast(reference); - string refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); + + if (refIdentifier == "") { + refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); + } vector expressions; vector paramMemPointers; string qualifiedInvName = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - shared_ptr inScope = scope.lookup(qualifiedInvName); + auto inScope = scope.lookup(qualifiedInvName); for (auto arg : funcInvNode->getParameters()->getElements()) { int byteSize = getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())); @@ -687,9 +717,6 @@ namespace Theta { ); } - cout << "meow" << endl; - cout << inScope->toJSON() << endl; - BinaryenExpressionPrint( BinaryenLoad( module, @@ -700,7 +727,7 @@ namespace Theta { BinaryenTypeInt32(), BinaryenLocalGet( // TODO: need to check if is -1 and globalget instead module, - inScope->getMappedBinaryenIndex(), + inScope.value()->getMappedBinaryenIndex(), BinaryenTypeInt32() ), MEMORY_NAME.c_str() @@ -758,7 +785,7 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateIdentifier(shared_ptr identNode, BinaryenModuleRef &module) { string identName = identNode->getIdentifier(); - shared_ptr identInScope = scope.lookup(identName); + shared_ptr identInScope = scope.lookup(identName).value(); // The ident in this case may refer to a parameter to a function, which may not have a resolvedType shared_ptr type = dynamic_pointer_cast( @@ -911,6 +938,8 @@ namespace Theta { int totalMemSize = 8 + (closure.getArgPointers().size() * 4) + (closure.getArity() * 4); int memLocation = memoryOffset; + memoryOffset += totalMemSize; + vector closureDataSegments = { closure.getFunctionPointer().getAddress(), closure.getArity() }; for (int i = 0; i < closure.getArgPointers().size(); i++) { closureDataSegments.push_back(closure.getArgPointers().at(i).getAddress()); @@ -985,6 +1014,7 @@ namespace Theta { void CodeGen::hoistCapsuleElements(vector> elements) { scope.enterScope(); + scopeReferences.enterScope(); for (auto ast : elements) bindIdentifierToScope(ast); } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index e23f4bf..aeaa4f4 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -42,7 +42,8 @@ namespace Theta { BinaryenExpressionRef generateIndirectInvocation( shared_ptr node, shared_ptr reference, - BinaryenModuleRef &module + BinaryenModuleRef &module, + string refIdentifier = "" ); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module); @@ -60,7 +61,8 @@ namespace Theta { ); private: - SymbolTableStack scope; + SymbolTableStack> scope; + SymbolTableStack scopeReferences; string FN_TABLE_NAME = "0"; string MEMORY_NAME = "0"; int memoryOffset = 0; diff --git a/src/compiler/SymbolTable.hpp b/src/compiler/SymbolTable.hpp index 7333c76..c8dfa1b 100644 --- a/src/compiler/SymbolTable.hpp +++ b/src/compiler/SymbolTable.hpp @@ -1,26 +1,26 @@ #pragma once #include -#include "../parser/ast/TypeDeclarationNode.hpp" -#include "parser/ast/ASTNode.hpp" +#include using namespace std; namespace Theta { + template class SymbolTable { public: - void insert(const string &name, shared_ptr type) { - table[name] = type; + void insert(const string &name, T value) { + table[name] = value; } - shared_ptr lookup(const string &name) { + optional lookup(const string &name) { auto it = table.find(name); if (it != table.end()) return it->second; - return nullptr; + return nullopt; } private: - map> table; + map table; }; } diff --git a/src/compiler/SymbolTableStack.hpp b/src/compiler/SymbolTableStack.hpp index b30f074..e666332 100644 --- a/src/compiler/SymbolTableStack.hpp +++ b/src/compiler/SymbolTableStack.hpp @@ -1,41 +1,41 @@ #pragma once -#include "parser/ast/ASTNode.hpp" #include "SymbolTable.hpp" #include using namespace std; namespace Theta { + template class SymbolTableStack { public: void enterScope() { - scopes.push(make_shared()); + scopes.push(make_shared>()); } void exitScope() { if (!scopes.empty()) scopes.pop(); } - void insert(const string &name, shared_ptr type) { - if (!scopes.empty()) scopes.top()->insert(name, type); + void insert(const string &name, T value) { + if (!scopes.empty()) scopes.top()->insert(name, value); } - shared_ptr lookup(const string &name) { - stack> tmpScopes = scopes; + optional lookup(const string &name) { + stack>> tmpScopes = scopes; while(!tmpScopes.empty()) { - auto type = tmpScopes.top()->lookup(name); + auto result = tmpScopes.top()->lookup(name); - if (type) return type; + if (result.has_value()) return result.value(); tmpScopes.pop(); } - return nullptr; + return nullopt; } private: - stack> scopes; + stack>> scopes; }; } diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index 88fc5a5..bbd0529 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -170,18 +170,18 @@ namespace Theta { ) ); - shared_ptr existingFuncIdentifierInScope = identifierTable.lookup(uniqueFuncIdentifier); + auto existingFuncIdentifierInScope = identifierTable.lookup(uniqueFuncIdentifier); - if (existingFuncIdentifierInScope) { + if (existingFuncIdentifierInScope.has_value()) { Compiler::getInstance().addException(make_shared(ident->getIdentifier())); return false; } identifierTable.insert(uniqueFuncIdentifier, node->getRight()); } else { - shared_ptr existingIdentifierInScope = identifierTable.lookup(ident->getIdentifier()); + auto existingIdentifierInScope = identifierTable.lookup(ident->getIdentifier()); - if (existingIdentifierInScope) { + if (existingIdentifierInScope.has_value()) { Compiler::getInstance().addException(make_shared(ident->getIdentifier())); return false; } @@ -553,9 +553,9 @@ namespace Theta { structNode->setResolvedType(make_shared(node->getName(), structNode)); - shared_ptr existingIdentifierInScope = identifierTable.lookup(node->getName()); + auto existingIdentifierInScope = identifierTable.lookup(node->getName()); - if (existingIdentifierInScope) { + if (existingIdentifierInScope.has_value()) { Compiler::getInstance().addException(make_shared(node->getName())); return false; } @@ -665,9 +665,9 @@ namespace Theta { string uniqueFuncIdentifier = Compiler::getQualifiedFunctionIdentifier(ident->getIdentifier(), node->getRight()); - shared_ptr existingFuncIdentifierInScope = capsuleDeclarationsTable.lookup(uniqueFuncIdentifier); + auto existingFuncIdentifierInScope = capsuleDeclarationsTable.lookup(uniqueFuncIdentifier); - if (existingFuncIdentifierInScope) { + if (existingFuncIdentifierInScope.has_value()) { Compiler::getInstance().addException(make_shared(ident->getIdentifier())); return; } @@ -683,9 +683,9 @@ namespace Theta { void TypeChecker::hoistStructDefinition(shared_ptr node) { shared_ptr structNode = dynamic_pointer_cast(node); - shared_ptr existingStructDefinitionInScope = capsuleDeclarationsTable.lookup(structNode->getName()); + auto existingStructDefinitionInScope = capsuleDeclarationsTable.lookup(structNode->getName()); - if (existingStructDefinitionInScope) { + if (existingStructDefinitionInScope.has_value()) { Compiler::getInstance().addException(make_shared(structNode->getName())); return; } @@ -698,9 +698,9 @@ namespace Theta { void TypeChecker::hoistIdentifier(shared_ptr node) { shared_ptr identNode = dynamic_pointer_cast(node->getLeft()); - shared_ptr existingHoistedIdentifier = capsuleDeclarationsTable.lookup(identNode->getIdentifier()); + auto existingHoistedIdentifier = capsuleDeclarationsTable.lookup(identNode->getIdentifier()); - if (existingHoistedIdentifier) { + if (existingHoistedIdentifier.has_value()) { Compiler::getInstance().addException(make_shared(identNode->getIdentifier())); return; } @@ -851,13 +851,15 @@ namespace Theta { } shared_ptr TypeChecker::lookupInScope(string identifierName) { - shared_ptr foundInCapsule = capsuleDeclarationsTable.lookup(identifierName); - shared_ptr foundInLocalScope = identifierTable.lookup(identifierName); + auto foundInCapsule = capsuleDeclarationsTable.lookup(identifierName); + auto foundInLocalScope = identifierTable.lookup(identifierName); // Local scope overrides capsule scope - if (foundInLocalScope) return foundInLocalScope; - - return foundInCapsule; + if (foundInLocalScope.has_value()) return foundInLocalScope.value(); + + if (foundInCapsule.has_value()) return foundInCapsule.value(); + + return nullptr; } shared_ptr TypeChecker::getFunctionReturnType(shared_ptr fnDeclNode) { diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 9b3972a..0585ea0 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -65,8 +65,8 @@ namespace Theta { private: - SymbolTableStack identifierTable; - SymbolTableStack capsuleDeclarationsTable; + SymbolTableStack> identifierTable; + SymbolTableStack> capsuleDeclarationsTable; /** * @brief Performs type checking on a single AST node. diff --git a/src/compiler/optimization/LiteralInlinerPass.cpp b/src/compiler/optimization/LiteralInlinerPass.cpp index ece749f..4e2cb00 100644 --- a/src/compiler/optimization/LiteralInlinerPass.cpp +++ b/src/compiler/optimization/LiteralInlinerPass.cpp @@ -39,48 +39,48 @@ void LiteralInlinerPass::substituteIdentifiers(shared_ptr &ast) { if (ast->getValue() && ast->getValue()->getNodeType() == ASTNode::TYPE_DECLARATION) return; shared_ptr ident = dynamic_pointer_cast(ast); - shared_ptr foundIdentifier = hoistedScope.lookup(ident->getIdentifier()); + auto foundIdentifier = hoistedScope.lookup(ident->getIdentifier()); - shared_ptr foundInScope = localScope.lookup(ident->getIdentifier()); - if (foundInScope) { + auto foundInScope = localScope.lookup(ident->getIdentifier()); + if (foundInScope.has_value()) { foundIdentifier = foundInScope; } // Only optimize if we found the literal value we need to replace with if ( - !foundIdentifier || + !foundIdentifier.has_value() || !( - foundIdentifier->getNodeType() == ASTNode::NUMBER_LITERAL || - foundIdentifier->getNodeType() == ASTNode::STRING_LITERAL || - foundIdentifier->getNodeType() == ASTNode::BOOLEAN_LITERAL + foundIdentifier.value()->getNodeType() == ASTNode::NUMBER_LITERAL || + foundIdentifier.value()->getNodeType() == ASTNode::STRING_LITERAL || + foundIdentifier.value()->getNodeType() == ASTNode::BOOLEAN_LITERAL ) ) return; - shared_ptr literal = dynamic_pointer_cast(foundIdentifier); + shared_ptr literal = dynamic_pointer_cast(foundIdentifier.value()); ast = make_shared(literal->getNodeType(), literal->getLiteralValue(), ast); } // When we have a variable assigned to a literal, we can safely just add that to the scope // since we know it references a primitive value -void LiteralInlinerPass::bindIdentifierToScope(shared_ptr &ast, SymbolTableStack &scope) { +void LiteralInlinerPass::bindIdentifierToScope(shared_ptr &ast, SymbolTableStack> &scope) { string identifier = dynamic_pointer_cast(ast->getLeft())->getIdentifier(); if (ast->getRight()->getNodeType() == ASTNode::FUNCTION_DECLARATION) { string uniqueFuncIdentifier = Compiler::getQualifiedFunctionIdentifier(identifier, ast->getRight()); - shared_ptr existingFuncIdentifierInScope = scope.lookup(uniqueFuncIdentifier); + auto existingFuncIdentifierInScope = scope.lookup(uniqueFuncIdentifier); - if (existingFuncIdentifierInScope) { + if (existingFuncIdentifierInScope.has_value()) { Compiler::getInstance().addException(make_shared(identifier)); return; } scope.insert(uniqueFuncIdentifier, ast->getRight()); } else { - shared_ptr foundIdentInScope = scope.lookup(identifier); + auto foundIdentInScope = scope.lookup(identifier); - if (foundIdentInScope) { + if (foundIdentInScope.has_value()) { Compiler::getInstance().addException(make_shared(identifier)); return; } @@ -119,7 +119,7 @@ void LiteralInlinerPass::hoistNecessary(shared_ptr &ast) { nodeList->setElements(topLevelElements); } -void LiteralInlinerPass::unpackEnumElementsInScope(shared_ptr node, SymbolTableStack &scope) { +void LiteralInlinerPass::unpackEnumElementsInScope(shared_ptr node, SymbolTableStack> &scope) { shared_ptr enumNode = dynamic_pointer_cast(node); string baseIdentifier = dynamic_pointer_cast(enumNode->getIdentifier())->getIdentifier(); vector> enumElements = dynamic_pointer_cast(node)->getElements(); @@ -129,7 +129,7 @@ void LiteralInlinerPass::unpackEnumElementsInScope(shared_ptr node, Sym string enumElIdentifier = baseIdentifier + "." + elSymbol->getSymbol().substr(1); - shared_ptr foundScopeIdentifier = scope.lookup(enumElIdentifier); + auto foundScopeIdentifier = scope.lookup(enumElIdentifier); if (foundScopeIdentifier) { Compiler::getInstance().addException(make_shared(enumElIdentifier)); return; diff --git a/src/compiler/optimization/LiteralInlinerPass.hpp b/src/compiler/optimization/LiteralInlinerPass.hpp index eea9389..e102a68 100644 --- a/src/compiler/optimization/LiteralInlinerPass.hpp +++ b/src/compiler/optimization/LiteralInlinerPass.hpp @@ -50,7 +50,7 @@ namespace Theta { * @param ast Reference to the shared pointer of the AST node representing an assignment. * @param scope Reference to the symbol table stack where the identifier will be bound. */ - void bindIdentifierToScope(shared_ptr &ast, SymbolTableStack &scope); + void bindIdentifierToScope(shared_ptr &ast, SymbolTableStack> &scope); /** * @brief Unpacks enum elements and adds them to the given scope. @@ -61,7 +61,7 @@ namespace Theta { * @param ast Reference to the shared pointer of the AST node representing an enum. * @param scope Reference to the symbol table stack where the enum elements will be inserted. */ - void unpackEnumElementsInScope(shared_ptr ast, SymbolTableStack &scope); + void unpackEnumElementsInScope(shared_ptr ast, SymbolTableStack> &scope); /** * @brief Remaps type references for enums by looking up the actual types from the scope. It basically diff --git a/src/compiler/optimization/OptimizationPass.cpp b/src/compiler/optimization/OptimizationPass.cpp index 0afc417..943b3c7 100644 --- a/src/compiler/optimization/OptimizationPass.cpp +++ b/src/compiler/optimization/OptimizationPass.cpp @@ -85,11 +85,13 @@ void OptimizationPass::optimize(shared_ptr &ast, bool isCapsuleDirectCh } shared_ptr OptimizationPass::lookupInScope(string identifierName) { - shared_ptr foindHoisted = hoistedScope.lookup(identifierName); - shared_ptr foundInLocalScope = localScope.lookup(identifierName); + auto foundHoisted = hoistedScope.lookup(identifierName); + auto foundInLocalScope = localScope.lookup(identifierName); // Local scope overrides capsule scope - if (foundInLocalScope) return foundInLocalScope; + if (foundInLocalScope.has_value()) return foundInLocalScope.value(); - return foindHoisted; + if (foundHoisted.has_value()) return foundHoisted.value(); + + return nullptr; } diff --git a/src/compiler/optimization/OptimizationPass.hpp b/src/compiler/optimization/OptimizationPass.hpp index 129c2a8..02f51eb 100644 --- a/src/compiler/optimization/OptimizationPass.hpp +++ b/src/compiler/optimization/OptimizationPass.hpp @@ -28,13 +28,13 @@ namespace Theta { * @brief Cleans up and resets scope variables for the pass. Should always be called after the optimization pass finishes */ void cleanup() { - localScope = SymbolTableStack(); - hoistedScope = SymbolTableStack(); + localScope = SymbolTableStack>(); + hoistedScope = SymbolTableStack>(); } protected: - SymbolTableStack localScope; - SymbolTableStack hoistedScope; + SymbolTableStack> localScope; + SymbolTableStack> hoistedScope; /** * @brief Retrieves an AST node based on an identifier from the available scopes. diff --git a/src/parser/ast/ControlFlowNode.hpp b/src/parser/ast/ControlFlowNode.hpp index fb36abb..cc97ad6 100644 --- a/src/parser/ast/ControlFlowNode.hpp +++ b/src/parser/ast/ControlFlowNode.hpp @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include "ASTNode.hpp" From b998175fedd89a1e00ad63b1fd8f8673a708ef6a Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 16 Aug 2024 20:15:09 -0400 Subject: [PATCH 19/38] fix order in which closure arg pointers are stored in memory, also make sure uninitialized pointers dont get stored --- src/compiler/CodeGen.cpp | 5 +++++ src/compiler/WasmClosure.hpp | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 676c42b..e6bfddb 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -266,6 +266,7 @@ namespace Theta { vector expressions; vector> argPointers; + // Store the args into memory for (auto param : simplifiedReference->getParameters()->getElements()) { string paramName = dynamic_pointer_cast(param)->getIdentifier(); @@ -300,6 +301,7 @@ namespace Theta { argPointers ); + // Store a closure pointing to the args that were stored in memory vector storageExpressions = generateClosureMemoryStore(closure, module); copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); @@ -948,6 +950,9 @@ namespace Theta { vector expressions; for (int i = 0; i < closureDataSegments.size(); i++) { + // Don't store uninitialized pointers + if (closureDataSegments.at(i) == -1) continue; + expressions.push_back( BinaryenStore( module, diff --git a/src/compiler/WasmClosure.hpp b/src/compiler/WasmClosure.hpp index 03c59a0..60cb21a 100644 --- a/src/compiler/WasmClosure.hpp +++ b/src/compiler/WasmClosure.hpp @@ -18,8 +18,16 @@ namespace Theta { Pointer ptr, int initialArity, vector> args - ) : fnPointer(ptr), arity(initialArity), argPointers(args) { + ) : fnPointer(ptr), arity(initialArity) { argPointers.resize(arity); + + for (int i = 0; i < args.size(); i++) { + if (args.at(i).getAddress() != -1) { + argPointers[arity - 1] = args.at(i); + + arity--; + } + } } void setAddress(int closureMemAddress) { From 113d3d153badf384d53e4d4dbb4bef4abc127f92 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 18 Aug 2024 19:49:17 -0400 Subject: [PATCH 20/38] tiny bit of work --- src/compiler/CodeGen.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index e6bfddb..715cb2f 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -599,6 +599,10 @@ namespace Theta { if (refIdentifier == "") { refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); + } else { + cout << "got passed a refIdentifier: " << refIdentifier << endl; + // TODO: if a ref identifier was passed in, we know this is + // a function that has an existing closure already stored in memory } WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; From 7529de20e45f6e233cf38ef28eaa45561b9f24a0 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 19 Aug 2024 20:43:06 -0400 Subject: [PATCH 21/38] start call_indirect once we reach 0 arity in closure --- src/compiler/CodeGen.cpp | 121 ++++++++++++++++++++++++++++++++++--- src/wasm/ThetaLangCore.wat | 27 +++++++++ 2 files changed, 140 insertions(+), 8 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 715cb2f..a192cff 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -9,6 +9,7 @@ #include #include #include +#include "asmjs/shared-constants.h" #include "binaryen-c.h" #include "compiler/Compiler.hpp" #include "compiler/TypeChecker.hpp" @@ -590,25 +591,128 @@ namespace Theta { shared_ptr funcInvNode, shared_ptr reference, BinaryenModuleRef &module, - string refIdentifier + string passedRefIdentifier ) { string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); if (reference->getNodeType() == ASTNode::FUNCTION_DECLARATION) { shared_ptr ref = dynamic_pointer_cast(reference); + string refIdentifier = passedRefIdentifier; if (refIdentifier == "") { refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - } else { - cout << "got passed a refIdentifier: " << refIdentifier << endl; - // TODO: if a ref identifier was passed in, we know this is - // a function that has an existing closure already stored in memory } - + WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; + + vector expressions; + + string funcInvName = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); + + // If the calculated name isn't the same as the refIdentifier, we know + // this is a reference to a function and must have a closure already + // in memory + if (funcInvName != refIdentifier) { + cout << funcInvName << " is not the same as " << refIdentifier << endl; + + for (shared_ptr arg : funcInvNode->getParameters()->getElements()) { + shared_ptr argType = dynamic_pointer_cast(arg->getResolvedType()); + + int argByteSize = getByteSizeForType(argType); + + expressions.push_back( + BinaryenStore( + module, + argByteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), + generate(arg, module), + getBinaryenTypeFromTypeDeclaration(argType), + MEMORY_NAME.c_str() + ) + ); + + expressions.push_back( + BinaryenCall( + module, + "Theta.Function.populateClosure", + (BinaryenExpressionRef[]){ + BinaryenLocalGet( + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + BinaryenConst( + module, + BinaryenLiteralInt32(memoryOffset) + ) + }, + 2, + BinaryenTypeNone() + ) + ); + + memoryOffset += argByteSize; + } + + // If arity hits 0, we can call_indirect + expressions.push_back( + BinaryenIf( + module, + BinaryenUnary( + module, + BinaryenEqZInt32(), + BinaryenBinary( + module, + BinaryenAddInt32(), + BinaryenLocalGet( + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + BinaryenConst( + module, + BinaryenLiteralInt32(4) + ) + ) + ), + BinaryenCallIndirect( + module, + FN_TABLE_NAME.c_str(), + BinaryenLoad( + module, + 4, + false, + 0, + 0, + BinaryenTypeInt32(), + BinaryenLocalGet( + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + MEMORY_NAME.c_str() + ), + BinaryenExpressionRef *operands, + closureTemplate->getArity(), + BinaryenType params, + BinaryenType results + ), + NULL + ) + ); + + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; + for (int i = 0; i < expressions.size(); i++) { + blockExpressions[i] = expressions.at(i); + } + + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeNone()); + } + WasmClosure closure = WasmClosure::clone(closureTemplate); - vector expressions; vector> paramMemPointers; // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory @@ -687,7 +791,8 @@ namespace Theta { } shared_ptr ref = dynamic_pointer_cast(reference); - + + string refIdentifier = passedRefIdentifier; if (refIdentifier == "") { refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); } diff --git a/src/wasm/ThetaLangCore.wat b/src/wasm/ThetaLangCore.wat index ac0032a..a580d49 100644 --- a/src/wasm/ThetaLangCore.wat +++ b/src/wasm/ThetaLangCore.wat @@ -1,5 +1,32 @@ (module (memory $0 1 10) + (func $Theta.Function.populateClosure (param $closure_mem_addr i32) (param $param_addr i32) (local $arity i32) + (local.set $arity ;; Load the closure arity + (i32.load + (i32.add + (local.get $closure_mem_addr) + (i32.const 4) + ) + ) + ) + (i32.store offset=4 ;; Update the arity + (local.get $closure_mem_addr) + (i32.sub + (local.get $arity) + (i32.const 1) + ) + ) + (i32.store ;; Store param ptr into memory + (i32.add + (local.get $closure_mem_addr) + (i32.mul + (local.get $arity) + (i32.const 4) + ) + ) + (local.get $param_addr) + ) + ) (func $Theta.Function.executeIndirect (param $fn_idx i32) (result i32) (local $arity i32) (local.set $arity (i32.load From 5fde210e452b365a95f23a2a77c065b98c23106b Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 22 Aug 2024 15:41:31 -0400 Subject: [PATCH 22/38] almost correct indirect calls from closures --- src/compiler/CodeGen.cpp | 55 ++++++++++++++++++++++++++++++++-------- src/compiler/CodeGen.hpp | 1 + 2 files changed, 46 insertions(+), 10 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index a192cff..46a5d66 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -656,28 +656,54 @@ namespace Theta { memoryOffset += argByteSize; } + + BinaryenFunctionRef functionToExecute = BinaryenGetFunction(module, refIdentifier.c_str()); + BinaryenType functionParamType = BinaryenFunctionGetParams(functionToExecute); + int functionArity = BinaryenTypeArity(functionParamType); + + BinaryenType* types; + BinaryenTypeExpand(functionParamType, types); + BinaryenExpressionRef* loadArgsExpressions = new BinaryenExpressionRef[functionArity]; + + for (int i = 0; i < functionArity; i++) { + loadArgsExpressions[i] = BinaryenLoad( + module, + getByteSizeForType(types[i]), + false, // TODO: support negative values + 8 + i * 4, + 0, + types[i], + BinaryenLocalGet( // The local thats storing the pointer to the function we want to call + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + MEMORY_NAME.c_str() + ); + } + // If arity hits 0, we can call_indirect expressions.push_back( BinaryenIf( module, - BinaryenUnary( + BinaryenUnary( // Check if the arity is equal to 0 module, BinaryenEqZInt32(), - BinaryenBinary( + BinaryenBinary( // Add 4 to the closure pointer address to get the arity address module, BinaryenAddInt32(), - BinaryenLocalGet( + BinaryenLocalGet( // The local thats storing the pointer to the function we want to call module, scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), BinaryenTypeInt32() ), - BinaryenConst( + BinaryenConst( module, BinaryenLiteralInt32(4) ) ) ), - BinaryenCallIndirect( + BinaryenCallIndirect( // If the above check is true, execute_indirect module, FN_TABLE_NAME.c_str(), BinaryenLoad( @@ -694,10 +720,10 @@ namespace Theta { ), MEMORY_NAME.c_str() ), - BinaryenExpressionRef *operands, - closureTemplate->getArity(), - BinaryenType params, - BinaryenType results + loadArgsExpressions, + BinaryenTypeArity(functionParamType), + functionParamType, + BinaryenFunctionGetResults(functionToExecute) ), NULL ) @@ -1181,12 +1207,21 @@ namespace Theta { // TODO: Figure out if this holds true. According to // https://github.com/WebAssembly/stringref/blob/main/proposals/stringref/Overview.md#the-stringref-facility // stringrefs are either i32 or i64 - if (type->getType() == DataTypes::STRING) return 8; + if (type->getType() == DataTypes::STRING) return 4; cout << "Not implemented for type: " << type->getType() << endl; throw new runtime_error("Not implemented"); } + int CodeGen::getByteSizeForType(BinaryenType type) { + if (type == BinaryenTypeInt32()) return 4; + if (type == BinaryenTypeInt64()) return 8; + if (type == BinaryenTypeStringref()) return 4; + + cout << "Not implemented for type: " << to_string(type) << endl; + throw new runtime_error("Not implemented"); + } + BinaryenModuleRef CodeGen::importCoreLangWasm() { ifstream file(resolveAbsolutePath("wasm/ThetaLangCore.wat"), ios::binary); if (!file.is_open()) { diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index aeaa4f4..c80cfe8 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -106,6 +106,7 @@ namespace Theta { string generateFunctionHash(shared_ptr function); int getByteSizeForType(shared_ptr type); + int getByteSizeForType(BinaryenType type); BinaryenModuleRef importCoreLangWasm(); From b8023925d2c170461420d426dea97aaa5289f6b0 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 22 Aug 2024 17:51:20 -0400 Subject: [PATCH 23/38] indirect invocations through closures working! --- src/compiler/CodeGen.cpp | 42 +++++++++++++++++++++++--------------- src/wasm/ThetaLangCore.wat | 21 +------------------ 2 files changed, 26 insertions(+), 37 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 46a5d66..83eba3b 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -613,8 +613,6 @@ namespace Theta { // this is a reference to a function and must have a closure already // in memory if (funcInvName != refIdentifier) { - cout << funcInvName << " is not the same as " << refIdentifier << endl; - for (shared_ptr arg : funcInvNode->getParameters()->getElements()) { shared_ptr argType = dynamic_pointer_cast(arg->getResolvedType()); @@ -656,8 +654,8 @@ namespace Theta { memoryOffset += argByteSize; } - BinaryenFunctionRef functionToExecute = BinaryenGetFunction(module, refIdentifier.c_str()); + BinaryenType functionReturnType = BinaryenFunctionGetResults(functionToExecute); BinaryenType functionParamType = BinaryenFunctionGetParams(functionToExecute); int functionArity = BinaryenTypeArity(functionParamType); @@ -670,13 +668,22 @@ namespace Theta { module, getByteSizeForType(types[i]), false, // TODO: support negative values - 8 + i * 4, + 0, 0, types[i], - BinaryenLocalGet( // The local thats storing the pointer to the function we want to call + BinaryenLoad( // Loads the arg pointer module, - scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), - BinaryenTypeInt32() + 4, + false, + 8 + i * 4, + 0, + BinaryenTypeInt32(), + BinaryenLocalGet( // The local thats storing the pointer to the closure + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + MEMORY_NAME.c_str() ), MEMORY_NAME.c_str() ); @@ -689,18 +696,19 @@ namespace Theta { BinaryenUnary( // Check if the arity is equal to 0 module, BinaryenEqZInt32(), - BinaryenBinary( // Add 4 to the closure pointer address to get the arity address + BinaryenLoad( module, - BinaryenAddInt32(), + 4, // Add 4 to the closure pointer address to get the arity address + false, + 4, + 0, + BinaryenTypeInt32(), BinaryenLocalGet( // The local thats storing the pointer to the function we want to call module, scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), BinaryenTypeInt32() ), - BinaryenConst( - module, - BinaryenLiteralInt32(4) - ) + MEMORY_NAME.c_str() ) ), BinaryenCallIndirect( // If the above check is true, execute_indirect @@ -723,9 +731,9 @@ namespace Theta { loadArgsExpressions, BinaryenTypeArity(functionParamType), functionParamType, - BinaryenFunctionGetResults(functionToExecute) - ), - NULL + functionReturnType + ), + BinaryenConst(module, BinaryenLiteralInt64(-1)) ) ); @@ -734,7 +742,7 @@ namespace Theta { blockExpressions[i] = expressions.at(i); } - return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeNone()); + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), functionReturnType); } WasmClosure closure = WasmClosure::clone(closureTemplate); diff --git a/src/wasm/ThetaLangCore.wat b/src/wasm/ThetaLangCore.wat index a580d49..64ac97d 100644 --- a/src/wasm/ThetaLangCore.wat +++ b/src/wasm/ThetaLangCore.wat @@ -16,7 +16,7 @@ (i32.const 1) ) ) - (i32.store ;; Store param ptr into memory + (i32.store offset=4 ;; Store param ptr into memory (i32.add (local.get $closure_mem_addr) (i32.mul @@ -27,23 +27,4 @@ (local.get $param_addr) ) ) - (func $Theta.Function.executeIndirect (param $fn_idx i32) (result i32) (local $arity i32) - (local.set $arity - (i32.load - (i32.add - (local.get $fn_idx) - (i32.const 4) - ) - ) - ) - (if (result i32) - (i32.eqz (local.get $arity)) - (then - (i32.const 1) - ) - (else - (local.get $fn_idx) - ) - ) - ) ) From 23852b0a5a54c371786c3549ccfe6cdd0b2953a1 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 23 Aug 2024 21:49:06 -0400 Subject: [PATCH 24/38] fix issue finding function references when returning a reference using just an identifier --- src/compiler/CodeGen.cpp | 19 ++++++++++++++++++- src/compiler/TypeChecker.cpp | 6 ++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 83eba3b..77ac0a0 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -223,6 +223,11 @@ namespace Theta { scope.insert(globalQualifiedFunctionName, simplifiedDeclaration); scopeReferences.insert(localQualifiedFunctionName, globalQualifiedFunctionName); + // Also insert the assignment identifier into scope referenecs so that if we want to return a reference to the function + // using the identifier, we can do that. This will overwrite any previous scope references with that identifier, so only + // the most recent identifier of a given name can be returned as a reference + scopeReferences.insert(assignmentIdentifier, globalQualifiedFunctionName); + vector expressions = storage.second; // Returns a reference to the closure memory address @@ -825,12 +830,14 @@ namespace Theta { } shared_ptr ref = dynamic_pointer_cast(reference); - string refIdentifier = passedRefIdentifier; + if (refIdentifier == "") { refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); } + cout << "refidentifier is " << refIdentifier << endl; + vector expressions; vector paramMemPointers; @@ -930,6 +937,14 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateIdentifier(shared_ptr identNode, BinaryenModuleRef &module) { string identName = identNode->getIdentifier(); + cout << "generating " << identName << endl; + optional scopeRef = scopeReferences.lookup(identName); + + if (scopeRef) { + cout << identName << " references " << scopeRef.value() << endl; + identName = scopeRef.value(); + } + shared_ptr identInScope = scope.lookup(identName).value(); // The ident in this case may refer to a parameter to a function, which may not have a resolvedType @@ -939,6 +954,8 @@ namespace Theta { : identInScope->getValue() ); + cout << "resolvedtype of " << identName << " is " << type->toJSON() << endl; + if (identInScope->getMappedBinaryenIndex() == -1) { return BinaryenGlobalGet( module, diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index bbd0529..9af4b76 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -178,6 +178,12 @@ namespace Theta { } identifierTable.insert(uniqueFuncIdentifier, node->getRight()); + + // Also insert as the non-unique identifier, in case the function will be referenced without being called later. + // It's okay to overwrite any identifier table value thats already there for this identifier, because if there are + // multiple function definitions with the same identifier, when the user returns / references one without calling it, + // we'll assume they want the most recent one. + identifierTable.insert(ident->getIdentifier(), node->getResolvedType()); } else { auto existingIdentifierInScope = identifierTable.lookup(ident->getIdentifier()); From 6d0df05ae3d218a53f48f2c7b7f0dfc8cab3e15f Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 24 Aug 2024 22:34:20 -0400 Subject: [PATCH 25/38] refactor invocation code generation flow to simplify everything. just need to finish up indirect calls that reference the result of function invocations --- src/compiler/CodeGen.cpp | 486 ++++++++++++++++----------------------- src/compiler/CodeGen.hpp | 26 ++- 2 files changed, 224 insertions(+), 288 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 77ac0a0..81dcbbc 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -548,348 +548,269 @@ namespace Theta { } BinaryenExpressionRef CodeGen::generateFunctionInvocation(shared_ptr funcInvNode, BinaryenModuleRef &module) { - BinaryenExpressionRef* arguments = new BinaryenExpressionRef[funcInvNode->getParameters()->getElements().size()]; - - string funcName = Compiler::getQualifiedFunctionIdentifier( - dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(), - funcInvNode - ); - - for (int i = 0; i < funcInvNode->getParameters()->getElements().size(); i++) { - arguments[i] = generate(funcInvNode->getParameters()->getElements().at(i), module); - } + string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); - string scopeLookupIdentifier = funcName; + string scopeLookupIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - auto localReference = scopeReferences.lookup(funcName); - if (localReference.has_value()) { + auto localReference = scopeReferences.lookup(scopeLookupIdentifier); + if (localReference) { scopeLookupIdentifier = localReference.value(); } auto foundLocalReference = scope.lookup(scopeLookupIdentifier); - if (foundLocalReference.has_value()) { - return generateIndirectInvocation( - funcInvNode, - foundLocalReference.value(), - module, - scopeLookupIdentifier - ); + if (!foundLocalReference) { + cout << "Could not find reference for function invocation" << endl; + throw new runtime_error("Reference not found"); } - // TODO: Check if this needs to be an indirect call, and generate that instead of a normal call. Thats why the current compile - // is failing + string funcInvName = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - cout << "AAAAAAAHHH I SHOULD NEVER GET HERE" << endl; + // If the calculated name isn't the same as the refIdentifier, we know + // this is a reference to a function and must have a closure already + // in memory + if (foundLocalReference.value()->getNodeType() == ASTNode::FUNCTION_INVOCATION || funcInvName != scopeLookupIdentifier) { + return generateCallIndirectForExistingClosure(funcInvNode, scopeLookupIdentifier, module); + } - return BinaryenCall( - module, - funcName.c_str(), - arguments, - funcInvNode->getParameters()->getElements().size(), - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(funcInvNode->getResolvedType())) - ); + cout << "indirect for " << scopeLookupIdentifier << endl; + + return generateCallIndirectForNewClosure(funcInvNode, foundLocalReference.value(), scopeLookupIdentifier, module); } - // TODO: This needs to be refactored - BinaryenExpressionRef CodeGen::generateIndirectInvocation( + vector> CodeGen::generateFunctionInvocationArgMemoryInsertions( shared_ptr funcInvNode, - shared_ptr reference, + vector &expressions, BinaryenModuleRef &module, - string passedRefIdentifier + string refIdentifier ) { - string funcInvIdentifier = dynamic_pointer_cast(funcInvNode->getIdentifier())->getIdentifier(); - - if (reference->getNodeType() == ASTNode::FUNCTION_DECLARATION) { - shared_ptr ref = dynamic_pointer_cast(reference); - - string refIdentifier = passedRefIdentifier; - if (refIdentifier == "") { - refIdentifier = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - } - - WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; - - vector expressions; - - string funcInvName = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); - - // If the calculated name isn't the same as the refIdentifier, we know - // this is a reference to a function and must have a closure already - // in memory - if (funcInvName != refIdentifier) { - for (shared_ptr arg : funcInvNode->getParameters()->getElements()) { - shared_ptr argType = dynamic_pointer_cast(arg->getResolvedType()); - - int argByteSize = getByteSizeForType(argType); + vector> paramMemPointers; - expressions.push_back( - BinaryenStore( - module, - argByteSize, - 0, - 0, - BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), - generate(arg, module), - getBinaryenTypeFromTypeDeclaration(argType), - MEMORY_NAME.c_str() - ) - ); - - expressions.push_back( - BinaryenCall( - module, - "Theta.Function.populateClosure", - (BinaryenExpressionRef[]){ - BinaryenLocalGet( - module, - scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), - BinaryenTypeInt32() - ), - BinaryenConst( - module, - BinaryenLiteralInt32(memoryOffset) - ) - }, - 2, - BinaryenTypeNone() - ) - ); - - memoryOffset += argByteSize; - } - - BinaryenFunctionRef functionToExecute = BinaryenGetFunction(module, refIdentifier.c_str()); - BinaryenType functionReturnType = BinaryenFunctionGetResults(functionToExecute); - BinaryenType functionParamType = BinaryenFunctionGetParams(functionToExecute); - int functionArity = BinaryenTypeArity(functionParamType); + // Store each passed argument into memory + for (shared_ptr arg : funcInvNode->getParameters()->getElements()) { + shared_ptr argType = dynamic_pointer_cast(arg->getResolvedType()); + + int argByteSize = getByteSizeForType(argType); - BinaryenType* types; - BinaryenTypeExpand(functionParamType, types); - BinaryenExpressionRef* loadArgsExpressions = new BinaryenExpressionRef[functionArity]; + expressions.push_back( + BinaryenStore( + module, + argByteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), + generate(arg, module), + getBinaryenTypeFromTypeDeclaration(argType), + MEMORY_NAME.c_str() + ) + ); - for (int i = 0; i < functionArity; i++) { - loadArgsExpressions[i] = BinaryenLoad( + // If a refIdentifier was passed, that means we have an existing closure + // in memory that we want to populate. + if (refIdentifier != "") { + expressions.push_back( + BinaryenCall( module, - getByteSizeForType(types[i]), - false, // TODO: support negative values - 0, - 0, - types[i], - BinaryenLoad( // Loads the arg pointer - module, - 4, - false, - 8 + i * 4, - 0, - BinaryenTypeInt32(), - BinaryenLocalGet( // The local thats storing the pointer to the closure + "Theta.Function.populateClosure", + (BinaryenExpressionRef[]){ + BinaryenLocalGet( module, scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), BinaryenTypeInt32() ), - MEMORY_NAME.c_str() - ), - MEMORY_NAME.c_str() - ); - } - - // If arity hits 0, we can call_indirect - expressions.push_back( - BinaryenIf( - module, - BinaryenUnary( // Check if the arity is equal to 0 - module, - BinaryenEqZInt32(), - BinaryenLoad( + BinaryenConst( module, - 4, // Add 4 to the closure pointer address to get the arity address - false, - 4, - 0, - BinaryenTypeInt32(), - BinaryenLocalGet( // The local thats storing the pointer to the function we want to call - module, - scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), - BinaryenTypeInt32() - ), - MEMORY_NAME.c_str() + BinaryenLiteralInt32(memoryOffset) ) - ), - BinaryenCallIndirect( // If the above check is true, execute_indirect - module, - FN_TABLE_NAME.c_str(), - BinaryenLoad( - module, - 4, - false, - 0, - 0, - BinaryenTypeInt32(), - BinaryenLocalGet( - module, - scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), - BinaryenTypeInt32() - ), - MEMORY_NAME.c_str() - ), - loadArgsExpressions, - BinaryenTypeArity(functionParamType), - functionParamType, - functionReturnType - ), - BinaryenConst(module, BinaryenLiteralInt64(-1)) + }, + 2, + BinaryenTypeNone() ) ); - - BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; - for (int i = 0; i < expressions.size(); i++) { - blockExpressions[i] = expressions.at(i); - } - - return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), functionReturnType); } - WasmClosure closure = WasmClosure::clone(closureTemplate); + paramMemPointers.push_back(Pointer(memoryOffset)); - vector> paramMemPointers; + memoryOffset += argByteSize; + } - // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory - // That way, we save a bunch of store and load calls, and can just skip to the call_indirect - for (auto arg : funcInvNode->getParameters()->getElements()) { - int byteSize = getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())); - int memLocation = memoryOffset; + return paramMemPointers; + } - paramMemPointers.push_back(Pointer(memLocation)); + BinaryenExpressionRef CodeGen::generateCallIndirectForExistingClosure( + shared_ptr funcInvNode, + string refIdentifier, + BinaryenModuleRef &module + ) { + vector expressions; - memoryOffset += byteSize; + generateFunctionInvocationArgMemoryInsertions(funcInvNode, expressions, module, refIdentifier); - // Store each passed argument into memory - expressions.push_back( - BinaryenStore( - module, - byteSize, - 0, - 0, - BinaryenConst(module, BinaryenLiteralInt32(memLocation)), - generate(arg, module), - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), - MEMORY_NAME.c_str() - ) - ); - } + cout << "in here for " << refIdentifier << endl; - closure.addArgs(paramMemPointers); + // TODO: The referenced function isn't necessarily always already generated by Binaryen. If we haven't generated + // the reference before calling it here, this will bomb out. Thats why the current build is failing. The below lines + // need to be changed to figure out the arity, types, etc strictly from the funcInvNode data (and potentially a reference node, if needed) - vector storageExpressions = generateClosureMemoryStore(closure, module); + BinaryenFunctionRef functionToExecute = BinaryenGetFunction(module, refIdentifier.c_str()); + BinaryenType functionReturnType = BinaryenFunctionGetResults(functionToExecute); + BinaryenType functionParamType = BinaryenFunctionGetParams(functionToExecute); + int functionArity = BinaryenTypeArity(functionParamType); - copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); + BinaryenType* types; + BinaryenTypeExpand(functionParamType, types); + BinaryenExpressionRef* loadArgsExpressions = new BinaryenExpressionRef[functionArity]; - // If we're at 0 arity we can go ahead and execute the function call - if (closure.getArity() == 0) { - BinaryenExpressionRef* operands = new BinaryenExpressionRef[closure.getArgPointers().size()]; - - for (int i = 0; i < closure.getArgPointers().size(); i++) { - shared_ptr arg = funcInvNode->getParameters()->getElements().at(i); + for (int i = 0; i < functionArity; i++) { + loadArgsExpressions[i] = BinaryenLoad( + module, + getByteSizeForType(types[i]), + false, // TODO: support negative values + 0, + 0, + types[i], + BinaryenLoad( // Loads the arg pointer + module, + 4, + false, + 8 + i * 4, + 0, + BinaryenTypeInt32(), + BinaryenLocalGet( // The local thats storing the pointer to the closure + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + MEMORY_NAME.c_str() + ), + MEMORY_NAME.c_str() + ); + } - operands[i] = BinaryenLoad( + // If arity hits 0, we can call_indirect + expressions.push_back( + BinaryenIf( + module, + BinaryenUnary( // Check if the arity is equal to 0 + module, + BinaryenEqZInt32(), + BinaryenLoad( module, - getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())), - false, // TODO: Support signed values! + 4, // Add 4 to the closure pointer address to get the arity address + false, + 4, 0, - 0, - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), // TODO: fix the hardcoded stuff here - BinaryenConst(module, BinaryenLiteralInt32(closure.getArgPointers().at(i).getAddress())), + BinaryenTypeInt32(), + BinaryenLocalGet( // The local thats storing the pointer to the function we want to call + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), MEMORY_NAME.c_str() - ); - } - - pair fnTypes = getBinaryenTypeForFunctionDeclaration(ref); - - expressions.push_back( - BinaryenCallIndirect( - module, - FN_TABLE_NAME.c_str(), - BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionPointer().getAddress())), - operands, - closure.getArgPointers().size(), - fnTypes.first, - fnTypes.second ) - ); - } else { - expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closure.getPointer().getAddress()))); - } - - BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; - for (int i = 0; i < expressions.size(); i++) { - blockExpressions[i] = expressions.at(i); - } - - return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeInt64()); - } - - shared_ptr ref = dynamic_pointer_cast(reference); - string refIdentifier = passedRefIdentifier; + ), + BinaryenCallIndirect( // If the above check is true, execute_indirect + module, + FN_TABLE_NAME.c_str(), + BinaryenLoad( + module, + 4, + false, + 0, + 0, + BinaryenTypeInt32(), + BinaryenLocalGet( + module, + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() + ), + MEMORY_NAME.c_str() + ), + loadArgsExpressions, + BinaryenTypeArity(functionParamType), + functionParamType, + functionReturnType + ), + BinaryenConst(module, BinaryenLiteralInt64(-1)) + ) + ); - if (refIdentifier == "") { - refIdentifier = dynamic_pointer_cast(ref->getIdentifier())->getIdentifier(); + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; + for (int i = 0; i < expressions.size(); i++) { + blockExpressions[i] = expressions.at(i); } + + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), functionReturnType); + } - cout << "refidentifier is " << refIdentifier << endl; - + BinaryenExpressionRef CodeGen::generateCallIndirectForNewClosure( + shared_ptr funcInvNode, + shared_ptr ref, + string refIdentifier, + BinaryenModuleRef &module + ) { vector expressions; - vector paramMemPointers; - string qualifiedInvName = Compiler::getQualifiedFunctionIdentifier(funcInvIdentifier, funcInvNode); + // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory + // That way, we save a bunch of store and load calls, and can just skip to the call_indirect + vector> paramMemPointers = generateFunctionInvocationArgMemoryInsertions( + funcInvNode, + expressions, + module + ); - auto inScope = scope.lookup(qualifiedInvName); + WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; + WasmClosure closure = WasmClosure::clone(closureTemplate); + closure.addArgs(paramMemPointers); - for (auto arg : funcInvNode->getParameters()->getElements()) { - int byteSize = getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())); - int memLocation = memoryOffset; + vector storageExpressions = generateClosureMemoryStore(closure, module); - paramMemPointers.push_back(memLocation); - - // TODO: instead of incrementing memoryoffset each time, lets change to only increment after the loop, and then - // for each generated store operation just use the offset field instead of 0 - memoryOffset += byteSize; + copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); - expressions.push_back( - BinaryenStore( + // If we're at 0 arity we can go ahead and execute the function call + if (closure.getArity() == 0) { + BinaryenExpressionRef* operands = new BinaryenExpressionRef[closure.getArgPointers().size()]; + + for (int i = 0; i < closure.getArgPointers().size(); i++) { + shared_ptr arg = funcInvNode->getParameters()->getElements().at(i); + + operands[i] = BinaryenLoad( module, - byteSize, + getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())), + false, // TODO: Support signed values! 0, 0, - BinaryenConst(module, BinaryenLiteralInt32(memLocation)), - generate(arg, module), - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), + getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), // TODO: fix the hardcoded stuff here + BinaryenConst(module, BinaryenLiteralInt32(closure.getArgPointers().at(i).getAddress())), MEMORY_NAME.c_str() + ); + } + + pair fnTypes = getBinaryenTypeForFunctionDeclaration( + dynamic_pointer_cast(ref) + ); + + expressions.push_back( + BinaryenCallIndirect( + module, + FN_TABLE_NAME.c_str(), + BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionPointer().getAddress())), + operands, + closure.getArgPointers().size(), + fnTypes.first, + fnTypes.second ) ); + } else { + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closure.getPointer().getAddress()))); } - BinaryenExpressionPrint( - BinaryenLoad( - module, - 4, - false, - 0, - 0, - BinaryenTypeInt32(), - BinaryenLocalGet( // TODO: need to check if is -1 and globalget instead - module, - inScope.value()->getMappedBinaryenIndex(), - BinaryenTypeInt32() - ), - MEMORY_NAME.c_str() - ) - ); - - //functionNameToClosureTemplateMap - - - cout << "teehee" << endl; + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; + for (int i = 0; i < expressions.size(); i++) { + blockExpressions[i] = expressions.at(i); + } + + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), BinaryenTypeInt64()); } BinaryenExpressionRef CodeGen::generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module) { @@ -1160,7 +1081,8 @@ namespace Theta { if (typeDeclaration->getType() == DataTypes::FUNCTION) return BinaryenTypeInt32(); } - pair CodeGen::getBinaryenTypeForFunctionDeclaration(shared_ptr function) { + pair CodeGen::getBinaryenTypeForFunctionDeclaration(shared_ptr node) { + shared_ptr function = dynamic_pointer_cast(node); int totalParams = function->getParameters()->getElements().size(); BinaryenType* types = new BinaryenType[totalParams]; diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index c80cfe8..6f95bf1 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -39,12 +39,6 @@ namespace Theta { bool addToExports = false ); BinaryenExpressionRef generateFunctionInvocation(shared_ptr node, BinaryenModuleRef &module); - BinaryenExpressionRef generateIndirectInvocation( - shared_ptr node, - shared_ptr reference, - BinaryenModuleRef &module, - string refIdentifier = "" - ); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateBinaryOperation(shared_ptr node, BinaryenModuleRef &module); @@ -78,6 +72,26 @@ namespace Theta { BinaryenExpressionRef right, BinaryenModuleRef &modul ); + + vector> generateFunctionInvocationArgMemoryInsertions( + shared_ptr funcInvNode, + vector &expressions, + BinaryenModuleRef &module, + string refIdentifier = "" + ); + + BinaryenExpressionRef generateCallIndirectForNewClosure( + shared_ptr funcInvNode, + shared_ptr ref, + string refIdentifier, + BinaryenModuleRef &module + ); + + BinaryenExpressionRef generateCallIndirectForExistingClosure( + shared_ptr funcInvNode, + string refIdentifier, + BinaryenModuleRef &module + ); static BinaryenOp getBinaryenOpFromBinOpNode(shared_ptr node); static BinaryenType getBinaryenTypeFromTypeDeclaration(shared_ptr node); From 6be49c896f8047a8ed71c3699f3db13bb4243dd8 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 25 Aug 2024 00:18:18 -0400 Subject: [PATCH 26/38] remove unused constant --- src/compiler/CodeGen.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 6f95bf1..4f2b428 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -62,7 +62,6 @@ namespace Theta { int memoryOffset = 0; unordered_map functionNameToClosureTemplateMap; string LOCAL_IDX_SCOPE_KEY = "ThetaLang.internal.localIdxCounter"; - string BOOTSTRAP_FUNC_NAME = "ThetaLang.bootstrap"; BinaryenModuleRef initializeWasmModule(); From e8b5de0e0ea2f63e74f4ebdd9f027b604c8e4505 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 25 Aug 2024 18:50:48 -0400 Subject: [PATCH 27/38] curried indirect calls working --- src/compiler/CodeGen.cpp | 129 ++++++++++++++++++++++-------- src/compiler/CodeGen.hpp | 15 +++- src/compiler/FunctionMetaData.hpp | 30 +++++++ src/compiler/TypeChecker.cpp | 8 +- src/compiler/TypeChecker.hpp | 2 +- 5 files changed, 143 insertions(+), 41 deletions(-) create mode 100644 src/compiler/FunctionMetaData.hpp diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 81dcbbc..10b8437 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -570,11 +570,9 @@ namespace Theta { // this is a reference to a function and must have a closure already // in memory if (foundLocalReference.value()->getNodeType() == ASTNode::FUNCTION_INVOCATION || funcInvName != scopeLookupIdentifier) { - return generateCallIndirectForExistingClosure(funcInvNode, scopeLookupIdentifier, module); + return generateCallIndirectForExistingClosure(funcInvNode, foundLocalReference.value(), scopeLookupIdentifier, module); } - cout << "indirect for " << scopeLookupIdentifier << endl; - return generateCallIndirectForNewClosure(funcInvNode, foundLocalReference.value(), scopeLookupIdentifier, module); } @@ -639,6 +637,7 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateCallIndirectForExistingClosure( shared_ptr funcInvNode, + shared_ptr reference, string refIdentifier, BinaryenModuleRef &module ) { @@ -646,29 +645,27 @@ namespace Theta { generateFunctionInvocationArgMemoryInsertions(funcInvNode, expressions, module, refIdentifier); - cout << "in here for " << refIdentifier << endl; - // TODO: The referenced function isn't necessarily always already generated by Binaryen. If we haven't generated // the reference before calling it here, this will bomb out. Thats why the current build is failing. The below lines // need to be changed to figure out the arity, types, etc strictly from the funcInvNode data (and potentially a reference node, if needed) + + FunctionMetaData functionMetaData = (reference->getNodeType() == ASTNode::FUNCTION_INVOCATION + ? getDerivedFunctionMetaData(funcInvNode, dynamic_pointer_cast(reference)) + : getFunctionMetaData(dynamic_pointer_cast(reference)) + ); - BinaryenFunctionRef functionToExecute = BinaryenGetFunction(module, refIdentifier.c_str()); - BinaryenType functionReturnType = BinaryenFunctionGetResults(functionToExecute); - BinaryenType functionParamType = BinaryenFunctionGetParams(functionToExecute); - int functionArity = BinaryenTypeArity(functionParamType); + BinaryenExpressionRef* loadArgsExpressions = new BinaryenExpressionRef[functionMetaData.getArity()]; - BinaryenType* types; - BinaryenTypeExpand(functionParamType, types); - BinaryenExpressionRef* loadArgsExpressions = new BinaryenExpressionRef[functionArity]; + for (int i = 0; i < functionMetaData.getArity(); i++) { + BinaryenType argType = functionMetaData.getParams()[i]; - for (int i = 0; i < functionArity; i++) { loadArgsExpressions[i] = BinaryenLoad( module, - getByteSizeForType(types[i]), + getByteSizeForType(argType), false, // TODO: support negative values 0, 0, - types[i], + argType, BinaryenLoad( // Loads the arg pointer module, 4, @@ -727,9 +724,9 @@ namespace Theta { MEMORY_NAME.c_str() ), loadArgsExpressions, - BinaryenTypeArity(functionParamType), - functionParamType, - functionReturnType + functionMetaData.getArity(), + functionMetaData.getParamType(), + functionMetaData.getReturnType() ), BinaryenConst(module, BinaryenLiteralInt64(-1)) ) @@ -740,7 +737,7 @@ namespace Theta { blockExpressions[i] = expressions.at(i); } - return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), functionReturnType); + return BinaryenBlock(module, NULL, blockExpressions, expressions.size(), functionMetaData.getReturnType()); } BinaryenExpressionRef CodeGen::generateCallIndirectForNewClosure( @@ -786,7 +783,7 @@ namespace Theta { ); } - pair fnTypes = getBinaryenTypeForFunctionDeclaration( + FunctionMetaData functionMetaData = getFunctionMetaData( dynamic_pointer_cast(ref) ); @@ -797,8 +794,8 @@ namespace Theta { BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionPointer().getAddress())), operands, closure.getArgPointers().size(), - fnTypes.first, - fnTypes.second + functionMetaData.getParamType(), + functionMetaData.getReturnType() ) ); } else { @@ -858,11 +855,9 @@ namespace Theta { BinaryenExpressionRef CodeGen::generateIdentifier(shared_ptr identNode, BinaryenModuleRef &module) { string identName = identNode->getIdentifier(); - cout << "generating " << identName << endl; optional scopeRef = scopeReferences.lookup(identName); if (scopeRef) { - cout << identName << " references " << scopeRef.value() << endl; identName = scopeRef.value(); } @@ -875,8 +870,6 @@ namespace Theta { : identInScope->getValue() ); - cout << "resolvedtype of " << identName << " is " << type->toJSON() << endl; - if (identInScope->getMappedBinaryenIndex() == -1) { return BinaryenGlobalGet( module, @@ -1081,22 +1074,88 @@ namespace Theta { if (typeDeclaration->getType() == DataTypes::FUNCTION) return BinaryenTypeInt32(); } - pair CodeGen::getBinaryenTypeForFunctionDeclaration(shared_ptr node) { - shared_ptr function = dynamic_pointer_cast(node); - int totalParams = function->getParameters()->getElements().size(); + template + FunctionMetaData CodeGen::getFunctionMetaData(shared_ptr functionNode) { + int totalParams = functionNode->getParameters()->getElements().size(); + + BinaryenType* paramTypes = new BinaryenType[totalParams]; - BinaryenType* types = new BinaryenType[totalParams]; for (int i = 0; i < totalParams; i++) { - types[i] = getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(function->getParameters()->getElements().at(i)->getValue())); + paramTypes[i] = getBinaryenTypeFromTypeDeclaration( + dynamic_pointer_cast( + functionNode->getNodeType() == ASTNode::FUNCTION_INVOCATION + ? functionNode->getParameters()->getElements().at(i)->getResolvedType() + : functionNode->getParameters()->getElements().at(i)->getValue() + ) + ); + } + + BinaryenType returnType = getBinaryenTypeFromTypeDeclaration(TypeChecker::getFunctionReturnType(functionNode)); + + return FunctionMetaData( + totalParams, + paramTypes, + returnType + ); + } + + // This is basically getting the function metadata for the function that gets generated as a result of + // currying + // TODO: This wont work if the function that is curried doesnt use all of the parameters of its parent + // function, because the generated function will have less arity than what we are inferring it + // to have here. Need to change closure generation to just make every closure capture all the + // parameters of its parents + FunctionMetaData CodeGen::getDerivedFunctionMetaData( + shared_ptr invocation, + shared_ptr reference + ) { + FunctionMetaData invocationMeta = getFunctionMetaData(invocation); + FunctionMetaData referenceMeta = getFunctionMetaData(reference); + + int totalParams = invocationMeta.getArity() + referenceMeta.getArity(); + + BinaryenType* combinedParamTypes = new BinaryenType[totalParams]; + + copy(referenceMeta.getParams(), referenceMeta.getParams() + referenceMeta.getArity(), combinedParamTypes); + copy( + invocationMeta.getParams(), + invocationMeta.getParams() + invocationMeta.getArity(), + combinedParamTypes + referenceMeta.getArity() + ); + + return FunctionMetaData( + totalParams, + combinedParamTypes, + invocationMeta.getReturnType() + ); + } + + FunctionMetaData CodeGen::getFunctionMetaDataFromTypeDeclaration(shared_ptr type) { + int totalParams = (type->getValue() + ? 0 + : type->getElements().size() - 1 + ); + + BinaryenType* paramTypes = new BinaryenType[totalParams]; + + for (int i = 0; i < totalParams; i++) { + paramTypes[i] = getBinaryenTypeFromTypeDeclaration( + dynamic_pointer_cast(type->getElements().at(i)) + ); } - - BinaryenType paramType = BinaryenTypeCreate(types, totalParams); BinaryenType returnType = getBinaryenTypeFromTypeDeclaration( - dynamic_pointer_cast(TypeChecker::getFunctionReturnType(function)) + dynamic_pointer_cast(type->getValue() + ? type->getValue() + : type->getElements().back() + ) ); - return make_pair(paramType, returnType); + return FunctionMetaData( + totalParams, + paramTypes, + returnType + ); } void CodeGen::hoistCapsuleElements(vector> elements) { diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 4f2b428..5441d15 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -17,6 +17,7 @@ #include "parser/ast/TypeDeclarationNode.hpp" #include "parser/ast/FunctionInvocationNode.hpp" #include "parser/ast/ControlFlowNode.hpp" +#include "compiler/FunctionMetaData.hpp" #include #include #include @@ -88,15 +89,25 @@ namespace Theta { BinaryenExpressionRef generateCallIndirectForExistingClosure( shared_ptr funcInvNode, + shared_ptr ref, string refIdentifier, BinaryenModuleRef &module ); static BinaryenOp getBinaryenOpFromBinOpNode(shared_ptr node); static BinaryenType getBinaryenTypeFromTypeDeclaration(shared_ptr node); - static pair getBinaryenTypeForFunctionDeclaration(shared_ptr node); - void hoistCapsuleElements(vector> elements); + template + static FunctionMetaData getFunctionMetaData(shared_ptr node); + + static FunctionMetaData getFunctionMetaDataFromTypeDeclaration(shared_ptr type); + + static FunctionMetaData getDerivedFunctionMetaData( + shared_ptr inv, + shared_ptr ref + ); + + void hoistCapsuleElements(vector> ielements); void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); diff --git a/src/compiler/FunctionMetaData.hpp b/src/compiler/FunctionMetaData.hpp new file mode 100644 index 0000000..30f859b --- /dev/null +++ b/src/compiler/FunctionMetaData.hpp @@ -0,0 +1,30 @@ +#pragma once + +#include "binaryen-c.h" + +namespace Theta { +class FunctionMetaData { + public: + FunctionMetaData( + int totalParams, + BinaryenType* paramTypes, + BinaryenType resultType + ) : arity(totalParams), params(paramTypes), returnType(resultType) { + paramType = BinaryenTypeCreate(params, arity); + }; + + int getArity() { return arity; } + + BinaryenType* getParams() { return params; } + + BinaryenType getParamType() { return paramType; } + + BinaryenType getReturnType() { return returnType; } + + private: + int arity; + BinaryenType* params; + BinaryenType paramType; + BinaryenType returnType; +}; +} diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index 9af4b76..d0e86da 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -868,9 +868,11 @@ namespace Theta { return nullptr; } - shared_ptr TypeChecker::getFunctionReturnType(shared_ptr fnDeclNode) { - if (fnDeclNode->getResolvedType()->getValue()) return dynamic_pointer_cast(fnDeclNode->getResolvedType()->getValue()); + shared_ptr TypeChecker::getFunctionReturnType(shared_ptr fn) { + if (fn->getNodeType() == ASTNode::FUNCTION_INVOCATION) return dynamic_pointer_cast(fn->getResolvedType()); - return dynamic_pointer_cast(dynamic_pointer_cast(fnDeclNode->getResolvedType())->getElements().back()); + if (fn->getResolvedType()->getValue()) return dynamic_pointer_cast(fn->getResolvedType()->getValue()); + + return dynamic_pointer_cast(dynamic_pointer_cast(fn->getResolvedType())->getElements().back()); } } diff --git a/src/compiler/TypeChecker.hpp b/src/compiler/TypeChecker.hpp index 0585ea0..8b35dd1 100644 --- a/src/compiler/TypeChecker.hpp +++ b/src/compiler/TypeChecker.hpp @@ -61,7 +61,7 @@ namespace Theta { */ static bool isOneOfTypes(shared_ptr type, vector> options); - static shared_ptr getFunctionReturnType(shared_ptr fnDeclNode); + static shared_ptr getFunctionReturnType(shared_ptr fn); private: From a8cc8bd2c9e6e98a369766465c068f985f51ebe9 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 26 Aug 2024 17:12:47 -0400 Subject: [PATCH 28/38] change closure collection to include all parent function parameters --- src/compiler/CodeGen.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 10b8437..9a09a82 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -437,16 +437,20 @@ namespace Theta { shared_ptr parent = dynamic_pointer_cast(node->getParent()); // Go through the parameters backwards so we can preserve their order if we ascend further into the ancestors. This - // will get reversed at the end + // will get reversed at the end. Keeps all parameters of the parent function, even if they aren't used in the + // closure. This is necessary for the codegen that occurs elsewhere, if a function call occurs to a closure before + // that closure's codegen has run. The generator has no way of inferring which params are used, without itself + // first codegenning the closure -- so it infers that a closure will have the parameters of its parent functions. + // That can be improved in the future by changing the codegen flow to have it generate things as we come across them, + // which will save memory during runtime since we wont need to store extra params in memory, but this is good enough for now for (int i = parent->getParameters()->getElements().size() - 1; i >= 0; i--) { shared_ptr ident = dynamic_pointer_cast(parent->getParameters()->getElements().at(i)); - auto identNeeded = identifiersToFind.find(ident->getIdentifier()); - - if (identNeeded == identifiersToFind.end()) continue; - parameters.push_back(ident); - identifiersToFind.erase(ident->getIdentifier()); + + if (identifiersToFind.find(ident->getIdentifier()) != identifiersToFind.end()) { + identifiersToFind.erase(ident->getIdentifier()); + } } } @@ -645,10 +649,6 @@ namespace Theta { generateFunctionInvocationArgMemoryInsertions(funcInvNode, expressions, module, refIdentifier); - // TODO: The referenced function isn't necessarily always already generated by Binaryen. If we haven't generated - // the reference before calling it here, this will bomb out. Thats why the current build is failing. The below lines - // need to be changed to figure out the arity, types, etc strictly from the funcInvNode data (and potentially a reference node, if needed) - FunctionMetaData functionMetaData = (reference->getNodeType() == ASTNode::FUNCTION_INVOCATION ? getDerivedFunctionMetaData(funcInvNode, dynamic_pointer_cast(reference)) : getFunctionMetaData(dynamic_pointer_cast(reference)) From 80c06ce740aac6d85a7e7cea120f29d0723e22f4 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 26 Aug 2024 18:45:59 -0400 Subject: [PATCH 29/38] enhance indirect call generation when all parameters are available immediately --- src/compiler/CodeGen.cpp | 60 +++++++++++++++++----------------------- 1 file changed, 25 insertions(+), 35 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 9a09a82..af5e1be 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -656,10 +656,10 @@ namespace Theta { BinaryenExpressionRef* loadArgsExpressions = new BinaryenExpressionRef[functionMetaData.getArity()]; - for (int i = 0; i < functionMetaData.getArity(); i++) { + for (int i = functionMetaData.getArity() - 1; i >= 0; i--) { BinaryenType argType = functionMetaData.getParams()[i]; - loadArgsExpressions[i] = BinaryenLoad( + loadArgsExpressions[functionMetaData.getArity() - 1 - i] = BinaryenLoad( module, getByteSizeForType(argType), false, // TODO: support negative values @@ -746,41 +746,19 @@ namespace Theta { string refIdentifier, BinaryenModuleRef &module ) { - vector expressions; - - // TODO: This can be improved by checking if the arity will be 0 before adding anything to memory - // That way, we save a bunch of store and load calls, and can just skip to the call_indirect - vector> paramMemPointers = generateFunctionInvocationArgMemoryInsertions( - funcInvNode, - expressions, - module - ); - WasmClosure closureTemplate = functionNameToClosureTemplateMap.find(refIdentifier)->second; - WasmClosure closure = WasmClosure::clone(closureTemplate); - closure.addArgs(paramMemPointers); - - vector storageExpressions = generateClosureMemoryStore(closure, module); - - copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); + vector expressions; // If we're at 0 arity we can go ahead and execute the function call - if (closure.getArity() == 0) { - BinaryenExpressionRef* operands = new BinaryenExpressionRef[closure.getArgPointers().size()]; + if (funcInvNode->getParameters()->getElements().size() == closureTemplate.getArity()) { + BinaryenExpressionRef* operands = new BinaryenExpressionRef[closureTemplate.getArity()]; - for (int i = 0; i < closure.getArgPointers().size(); i++) { - shared_ptr arg = funcInvNode->getParameters()->getElements().at(i); - - operands[i] = BinaryenLoad( - module, - getByteSizeForType(dynamic_pointer_cast(arg->getResolvedType())), - false, // TODO: Support signed values! - 0, - 0, - getBinaryenTypeFromTypeDeclaration(dynamic_pointer_cast(arg->getResolvedType())), // TODO: fix the hardcoded stuff here - BinaryenConst(module, BinaryenLiteralInt32(closure.getArgPointers().at(i).getAddress())), - MEMORY_NAME.c_str() + for (int i = 0; i < closureTemplate.getArity(); i++) { + shared_ptr argType = dynamic_pointer_cast( + funcInvNode->getParameters()->getElements().at(i)->getResolvedType() ); + + operands[i] = generate(funcInvNode->getParameters()->getElements().at(i), module); } FunctionMetaData functionMetaData = getFunctionMetaData( @@ -791,17 +769,29 @@ namespace Theta { BinaryenCallIndirect( module, FN_TABLE_NAME.c_str(), - BinaryenConst(module, BinaryenLiteralInt32(closure.getFunctionPointer().getAddress())), + BinaryenConst(module, BinaryenLiteralInt32(closureTemplate.getFunctionPointer().getAddress())), operands, - closure.getArgPointers().size(), + functionMetaData.getArity(), functionMetaData.getParamType(), functionMetaData.getReturnType() ) ); } else { + WasmClosure closure = WasmClosure::clone(closureTemplate); + vector> paramMemPointers = generateFunctionInvocationArgMemoryInsertions( + funcInvNode, + expressions, + module + ); + + closure.addArgs(paramMemPointers); + + vector storageExpressions = generateClosureMemoryStore(closure, module); + copy(storageExpressions.begin(), storageExpressions.end(), back_inserter(expressions)); + expressions.push_back(BinaryenConst(module, BinaryenLiteralInt32(closure.getPointer().getAddress()))); } - + BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; for (int i = 0; i < expressions.size(); i++) { blockExpressions[i] = expressions.at(i); From d2d5024855425b9b2467a978e97a4ce3ef9c825e Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 27 Aug 2024 23:02:16 -0400 Subject: [PATCH 30/38] fix: unary parsing fixed, also added support for negative values in memory --- src/compiler/CodeGen.cpp | 7 +------ src/parser/Parser.cpp | 2 +- 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index af5e1be..d0e92de 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -297,7 +297,6 @@ namespace Theta { argPointers.push_back(Pointer(memoryOffset)); - // TODO: change to only increment memoryOffset after the loop finishes memoryOffset += byteSize; } @@ -662,7 +661,7 @@ namespace Theta { loadArgsExpressions[functionMetaData.getArity() - 1 - i] = BinaryenLoad( module, getByteSizeForType(argType), - false, // TODO: support negative values + true, // TODO: support negative values 0, 0, argType, @@ -1091,10 +1090,6 @@ namespace Theta { // This is basically getting the function metadata for the function that gets generated as a result of // currying - // TODO: This wont work if the function that is curried doesnt use all of the parameters of its parent - // function, because the generated function will have less arity than what we are inferring it - // to have here. Need to change closure generation to just make every closure capture all the - // parameters of its parents FunctionMetaData CodeGen::getDerivedFunctionMetaData( shared_ptr invocation, shared_ptr reference diff --git a/src/parser/Parser.cpp b/src/parser/Parser.cpp index 296de6d..854a295 100644 --- a/src/parser/Parser.cpp +++ b/src/parser/Parser.cpp @@ -478,7 +478,7 @@ namespace Theta { // Unary cant have a left arg, so if we get one passed in we can skip straight to primary if (!passedLeftArg && (match(Token::OPERATOR, Lexemes::NOT) || match(Token::OPERATOR, Lexemes::MINUS))) { shared_ptr un = make_shared(currentToken.getLexeme(), parent); - un->setValue(parseUnary(passedLeftArg, un)); + un->setValue(parseUnary(un, passedLeftArg)); return un; } From a557f63ed82e9b93e87f7640bbb6e52d925ba266 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 29 Aug 2024 21:53:08 -0400 Subject: [PATCH 31/38] strings almost working in indirect calls --- src/compiler/CodeGen.cpp | 155 +++++++++++++++++++++++++++------------ src/compiler/CodeGen.hpp | 5 +- 2 files changed, 114 insertions(+), 46 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index d0e92de..3d5f1fd 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -63,6 +63,14 @@ namespace Theta { false, MEMORY_NAME.c_str() ); + + BinaryenAddTable( + module, + STRINGREF_TABLE.c_str(), + 1000, + 100000000, + BinaryenTypeStringref() + ); StandardLibrary::registerFunctions(module); @@ -282,18 +290,33 @@ namespace Theta { shared_ptr paramType = dynamic_pointer_cast(param->getValue()); int byteSize = getByteSizeForType(paramType); - expressions.push_back( - BinaryenStore( - module, - byteSize, - 0, - 0, - BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), - generate(paramValue, module), - getBinaryenTypeFromTypeDeclaration(paramType), - MEMORY_NAME.c_str() - ) - ); + + BinaryenExpressionRef generatedValue = generate(paramValue, module); + if (paramType->getType() == DataTypes::STRING) { + expressions.push_back( + BinaryenTableSet( + module, + STRINGREF_TABLE.c_str(), + BinaryenConst(module, BinaryenLiteralInt32(stringRefOffset)), + generatedValue + ) + ); + + stringRefOffset += 1; + } else { + expressions.push_back( + BinaryenStore( + module, + byteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), + generatedValue, + getBinaryenStorageTypeFromTypeDeclaration(paramType), + MEMORY_NAME.c_str() + ) + ); + } argPointers.push_back(Pointer(memoryOffset)); @@ -593,18 +616,32 @@ namespace Theta { int argByteSize = getByteSizeForType(argType); - expressions.push_back( - BinaryenStore( - module, - argByteSize, - 0, - 0, - BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), - generate(arg, module), - getBinaryenTypeFromTypeDeclaration(argType), - MEMORY_NAME.c_str() - ) - ); + BinaryenExpressionRef generatedValue = generate(arg, module); + if (argType->getType() == DataTypes::STRING) { + expressions.push_back( + BinaryenTableSet( + module, + STRINGREF_TABLE.c_str(), + BinaryenConst(module, BinaryenLiteralInt32(stringRefOffset)), + generatedValue + ) + ); + + stringRefOffset += 1; + } else { + expressions.push_back( + BinaryenStore( + module, + argByteSize, + 0, + 0, + BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), + generatedValue, + getBinaryenStorageTypeFromTypeDeclaration(argType), + MEMORY_NAME.c_str() + ) + ); + } // If a refIdentifier was passed, that means we have an existing closure // in memory that we want to populate. @@ -658,29 +695,54 @@ namespace Theta { for (int i = functionMetaData.getArity() - 1; i >= 0; i--) { BinaryenType argType = functionMetaData.getParams()[i]; - loadArgsExpressions[functionMetaData.getArity() - 1 - i] = BinaryenLoad( + BinaryenExpressionRef loadArgPointerExpr = BinaryenLoad( // Loads the arg pointer module, - getByteSizeForType(argType), - true, // TODO: support negative values - 0, + 4, + false, + 8 + i * 4, 0, - argType, - BinaryenLoad( // Loads the arg pointer + BinaryenTypeInt32(), + BinaryenLocalGet( // The local thats storing the pointer to the closure module, - 4, - false, - 8 + i * 4, - 0, - BinaryenTypeInt32(), - BinaryenLocalGet( // The local thats storing the pointer to the closure - module, - scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), - BinaryenTypeInt32() - ), - MEMORY_NAME.c_str() + scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), + BinaryenTypeInt32() ), MEMORY_NAME.c_str() ); + + BinaryenExpressionRef loadArgExpression; + if (argType == BinaryenTypeStringref()) { + loadArgExpression = BinaryenTableGet( + module, + STRINGREF_TABLE.c_str(), + loadArgPointerExpr, + BinaryenTypeStringref() + ); + } else { + loadArgExpression = BinaryenLoad( + module, + getByteSizeForType(argType), + true, + 0, + 0, + (argType == BinaryenTypeStringref() ? BinaryenTypeInt32() : argType), + loadArgPointerExpr, + MEMORY_NAME.c_str() + ); + } + + loadArgsExpressions[functionMetaData.getArity() - 1 - i] = loadArgExpression; + } + + // In order for if statements to return a value in WASM, both branches must return the same concrete type. + // This is the value that will be returned by the else branch, should the if fail + BinaryenExpressionRef defaultReturnValue; + if (functionMetaData.getReturnType() == BinaryenTypeInt32()) { + defaultReturnValue = BinaryenConst(module, BinaryenLiteralInt32(-1)); + } else if (functionMetaData.getReturnType() == BinaryenTypeInt64()) { + defaultReturnValue = BinaryenConst(module, BinaryenLiteralInt64(-1)); + } else { + defaultReturnValue = BinaryenStringConst(module, ""); } // If arity hits 0, we can call_indirect @@ -727,7 +789,7 @@ namespace Theta { functionMetaData.getParamType(), functionMetaData.getReturnType() ), - BinaryenConst(module, BinaryenLiteralInt64(-1)) + defaultReturnValue ) ); @@ -1063,6 +1125,12 @@ namespace Theta { if (typeDeclaration->getType() == DataTypes::FUNCTION) return BinaryenTypeInt32(); } + BinaryenType CodeGen::getBinaryenStorageTypeFromTypeDeclaration(shared_ptr typeDeclaration) { + if (typeDeclaration->getType() == DataTypes::STRING) return BinaryenTypeInt32(); + + return getBinaryenTypeFromTypeDeclaration(typeDeclaration); + } + template FunctionMetaData CodeGen::getFunctionMetaData(shared_ptr functionNode) { int totalParams = functionNode->getParameters()->getElements().size(); @@ -1195,9 +1263,6 @@ namespace Theta { int CodeGen::getByteSizeForType(shared_ptr type) { if (type->getType() == DataTypes::NUMBER) return 8; if (type->getType() == DataTypes::BOOLEAN) return 4; - // TODO: Figure out if this holds true. According to - // https://github.com/WebAssembly/stringref/blob/main/proposals/stringref/Overview.md#the-stringref-facility - // stringrefs are either i32 or i64 if (type->getType() == DataTypes::STRING) return 4; cout << "Not implemented for type: " << type->getType() << endl; diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index 5441d15..aa92e1b 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -58,9 +58,11 @@ namespace Theta { private: SymbolTableStack> scope; SymbolTableStack scopeReferences; - string FN_TABLE_NAME = "0"; + string FN_TABLE_NAME = "ThetaFunctionRefs"; + string STRINGREF_TABLE = "ThetaStringRefs"; string MEMORY_NAME = "0"; int memoryOffset = 0; + int stringRefOffset = 0; unordered_map functionNameToClosureTemplateMap; string LOCAL_IDX_SCOPE_KEY = "ThetaLang.internal.localIdxCounter"; @@ -96,6 +98,7 @@ namespace Theta { static BinaryenOp getBinaryenOpFromBinOpNode(shared_ptr node); static BinaryenType getBinaryenTypeFromTypeDeclaration(shared_ptr node); + static BinaryenType getBinaryenStorageTypeFromTypeDeclaration(shared_ptr node); template static FunctionMetaData getFunctionMetaData(shared_ptr node); From 4a2a0da4acbfae09d3df351d274cf2fbe55172d2 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 31 Aug 2024 19:59:12 -0400 Subject: [PATCH 32/38] indirect function calls working with strings! --- src/compiler/CodeGen.cpp | 41 +++++++++++++++++++++----------------- src/compiler/CodeGen.hpp | 2 +- src/wasm/ThetaLangCore.wat | 1 + 3 files changed, 25 insertions(+), 19 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 3d5f1fd..8f8ca4d 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -289,8 +289,6 @@ namespace Theta { shared_ptr paramValue = scope.lookup(paramName).value(); shared_ptr paramType = dynamic_pointer_cast(param->getValue()); - int byteSize = getByteSizeForType(paramType); - BinaryenExpressionRef generatedValue = generate(paramValue, module); if (paramType->getType() == DataTypes::STRING) { expressions.push_back( @@ -301,9 +299,13 @@ namespace Theta { generatedValue ) ); + + argPointers.push_back(Pointer(stringRefOffset)); stringRefOffset += 1; } else { + int byteSize = getByteSizeForType(paramType); + expressions.push_back( BinaryenStore( module, @@ -316,11 +318,11 @@ namespace Theta { MEMORY_NAME.c_str() ) ); - } - - argPointers.push_back(Pointer(memoryOffset)); + + argPointers.push_back(Pointer(memoryOffset)); - memoryOffset += byteSize; + memoryOffset += byteSize; + } } WasmClosure closure = WasmClosure( @@ -614,28 +616,36 @@ namespace Theta { for (shared_ptr arg : funcInvNode->getParameters()->getElements()) { shared_ptr argType = dynamic_pointer_cast(arg->getResolvedType()); - int argByteSize = getByteSizeForType(argType); BinaryenExpressionRef generatedValue = generate(arg, module); + Pointer addressToPopulate; if (argType->getType() == DataTypes::STRING) { + addressToPopulate = Pointer(stringRefOffset); + + stringRefOffset += 1; + expressions.push_back( BinaryenTableSet( module, STRINGREF_TABLE.c_str(), - BinaryenConst(module, BinaryenLiteralInt32(stringRefOffset)), + BinaryenConst(module, BinaryenLiteralInt32(addressToPopulate.getAddress())), generatedValue ) ); - stringRefOffset += 1; } else { + addressToPopulate = Pointer(memoryOffset); + int argByteSize = getByteSizeForType(argType); + + memoryOffset += argByteSize; + expressions.push_back( BinaryenStore( module, argByteSize, 0, 0, - BinaryenConst(module, BinaryenLiteralInt32(memoryOffset)), + BinaryenConst(module, BinaryenLiteralInt32(addressToPopulate.getAddress())), generatedValue, getBinaryenStorageTypeFromTypeDeclaration(argType), MEMORY_NAME.c_str() @@ -643,6 +653,8 @@ namespace Theta { ); } + paramMemPointers.push_back(addressToPopulate); + // If a refIdentifier was passed, that means we have an existing closure // in memory that we want to populate. if (refIdentifier != "") { @@ -656,20 +668,13 @@ namespace Theta { scope.lookup(refIdentifier).value()->getMappedBinaryenIndex(), BinaryenTypeInt32() ), - BinaryenConst( - module, - BinaryenLiteralInt32(memoryOffset) - ) + BinaryenConst(module, BinaryenLiteralInt32(addressToPopulate.getAddress())) }, 2, BinaryenTypeNone() ) ); } - - paramMemPointers.push_back(Pointer(memoryOffset)); - - memoryOffset += argByteSize; } return paramMemPointers; diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index aa92e1b..c06c2dd 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -62,7 +62,7 @@ namespace Theta { string STRINGREF_TABLE = "ThetaStringRefs"; string MEMORY_NAME = "0"; int memoryOffset = 0; - int stringRefOffset = 0; + int stringRefOffset = 1; unordered_map functionNameToClosureTemplateMap; string LOCAL_IDX_SCOPE_KEY = "ThetaLang.internal.localIdxCounter"; diff --git a/src/wasm/ThetaLangCore.wat b/src/wasm/ThetaLangCore.wat index 64ac97d..022b767 100644 --- a/src/wasm/ThetaLangCore.wat +++ b/src/wasm/ThetaLangCore.wat @@ -1,4 +1,5 @@ (module + (import "console" "log" (func $log (param stringref))) ;; TODO: Remove this (memory $0 1 10) (func $Theta.Function.populateClosure (param $closure_mem_addr i32) (param $param_addr i32) (local $arity i32) (local.set $arity ;; Load the closure arity From 718fb3cd6842c1d1782fcadb21073a342309d505 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 2 Sep 2024 01:28:54 -0400 Subject: [PATCH 33/38] fix parser test --- test/ParserTest.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/test/ParserTest.cpp b/test/ParserTest.cpp index 6851042..2b339d9 100644 --- a/test/ParserTest.cpp +++ b/test/ParserTest.cpp @@ -898,11 +898,13 @@ TEST_CASE("Parser") { REQUIRE(typeNode->getNodeType() == ASTNode::TYPE_DECLARATION); REQUIRE(typeNode->getType() == "Tuple"); - shared_ptr leftTypeNode = dynamic_pointer_cast(typeNode->getLeft()); + + + shared_ptr leftTypeNode = dynamic_pointer_cast(typeNode->getElements().at(0)); REQUIRE(leftTypeNode != nullptr); REQUIRE(leftTypeNode->getType() == "Symbol"); - shared_ptr rightTypeNode = dynamic_pointer_cast(typeNode->getRight()); + shared_ptr rightTypeNode = dynamic_pointer_cast(typeNode->getElements().at(1)); REQUIRE(rightTypeNode != nullptr); REQUIRE(rightTypeNode->getType() == "String"); From 43061717157f80b733d1c6cebe36953385f41dbe Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 2 Sep 2024 22:52:18 -0400 Subject: [PATCH 34/38] fix: fix typechecking for variadics vs function defs. also fixes all tests --- src/compiler/TypeChecker.cpp | 30 ++++++++++++++++++++++-------- test/TypeCheckerTest.cpp | 28 ++++++++++++++++------------ 2 files changed, 38 insertions(+), 20 deletions(-) diff --git a/src/compiler/TypeChecker.cpp b/src/compiler/TypeChecker.cpp index d0e86da..9961162 100644 --- a/src/compiler/TypeChecker.cpp +++ b/src/compiler/TypeChecker.cpp @@ -137,7 +137,7 @@ namespace Theta { bool TypeChecker::checkAssignmentNode(shared_ptr node) { bool typesMatch = isSameType(node->getLeft()->getValue(), node->getRight()->getResolvedType()); - + shared_ptr ident = dynamic_pointer_cast(node->getLeft()); if (!typesMatch) { @@ -485,8 +485,10 @@ namespace Theta { shared_ptr type = make_shared(DataTypes::TUPLE, node); - type->setLeft(node->getLeft()->getResolvedType()); - type->setRight(node->getRight()->getResolvedType()); + type->setElements({ + node->getLeft()->getResolvedType(), + node->getRight()->getResolvedType() + }); node->setResolvedType(type); @@ -769,15 +771,27 @@ namespace Theta { if (!containsType) return false; } } else if (t1->hasMany() && t2->hasMany()) { - for (int i = 0; i < t2->getElements().size(); i++) { - bool containsType = false; + if (t1->getType() == DataTypes::VARIADIC) { + // In Variadic types, it makes sense that the types on the right can be any of the values defined + // on the left, in any order + for (int i = 0; i < t2->getElements().size(); i++) { + bool containsType = false; + + for (int j = 0; j < t1->getElements().size(); j++) { + if (isSameType(t2->getElements().at(i), t1->getElements().at(j))) containsType = true; + } - for (int j = 0; j < t1->getElements().size(); j++) { - if (isSameType(t2->getElements().at(i), t1->getElements().at(j))) containsType = true; + if (!containsType) return false; } + } else { + // In every other case, though, order matters. The left and right types must be exactly the same + if (t1->getElements().size() != t2->getElements().size()) return false; - if (!containsType) return false; + for (int i = 0; i < t1->getElements().size(); i++) { + if (!isSameType(t1->getElements().at(i), t2->getElements().at(i))) return false; + } } + } return t1->getType() == t2->getType(); diff --git a/test/TypeCheckerTest.cpp b/test/TypeCheckerTest.cpp index 571c0b3..7d289cb 100644 --- a/test/TypeCheckerTest.cpp +++ b/test/TypeCheckerTest.cpp @@ -223,7 +223,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { SECTION("Can typecheck function assignents with parameters correctly") { shared_ptr ast = setup(R"( capsule Test { - x> = (a) -> a + x> = (a) -> a } )"); @@ -246,6 +246,10 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { bool isValid = typeChecker.checkAST(ast); + for (auto ex : Compiler::getInstance().getEncounteredExceptions()) { + ex->display(); + } + REQUIRE(isValid); REQUIRE(Compiler::getInstance().getEncounteredExceptions().size() == 0); } @@ -270,7 +274,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { SECTION("Can typecheck curried functions correctly") { shared_ptr ast = setup(R"( capsule Test { - x>> = (a) -> (b) -> a + b + x>> = (a) -> (b) -> a + b } )"); @@ -289,7 +293,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { :HIGH } - severityIdent> = (sev) -> { + severityIdent> = (sev) -> { sev } } @@ -304,7 +308,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { SECTION("Can typecheck control flow correctly") { shared_ptr ast = setup(R"( capsule Test { - isEven> = (num) -> { + isEven> = (num) -> { if (num % 2 == 0) { return true } else if (num % 3 == 0) { @@ -325,7 +329,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { SECTION("Throws if control flow condition is not a boolean") { shared_ptr ast = setup(R"( capsule Test { - isLongString> = (str) -> { + isLongString> = (str) -> { if (str) { return true } @@ -342,7 +346,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { SECTION("Can typecheck recursive functions correctly") { shared_ptr ast = setup(R"( capsule Test { - addUntilTen> = (sum) -> { + addUntilTen> = (sum) -> { if (sum == 10) { return sum } @@ -406,9 +410,9 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { SECTION("Can typecheck function overloads correctly") { shared_ptr ast = setup(R"( capsule Test { - add> = (x) -> x - add> = (x, y) -> x + y - add> = (x, y) -> x + add> = (x) -> x + add> = (x, y) -> x + y + add> = (x, y) -> x doTheThing> = () -> { add(1) @@ -501,7 +505,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { y } - addPoints> = (p1, p2) -> { + addPoints> = (p1, p2) -> { @Point { x: 4, y: 2 @@ -524,7 +528,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { y } - addPoints> = (p1, p2) -> { + addPoints> = (p1, p2) -> { @Point { x: 4 } @@ -608,7 +612,7 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { capsule Test { z = 5 - add>> = (x) -> (y) -> x + y + z + add>> = (x) -> (y) -> x + y + z } )"); From 4a2004221cfb5e5bf4507e8a0487cc476f3d3ea9 Mon Sep 17 00:00:00 2001 From: Alex Date: Mon, 2 Sep 2024 22:53:35 -0400 Subject: [PATCH 35/38] remove unused code --- test/TypeCheckerTest.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/TypeCheckerTest.cpp b/test/TypeCheckerTest.cpp index 7d289cb..1f84211 100644 --- a/test/TypeCheckerTest.cpp +++ b/test/TypeCheckerTest.cpp @@ -246,10 +246,6 @@ TEST_CASE_METHOD(TypeCheckerTest, "TypeChecker") { bool isValid = typeChecker.checkAST(ast); - for (auto ex : Compiler::getInstance().getEncounteredExceptions()) { - ex->display(); - } - REQUIRE(isValid); REQUIRE(Compiler::getInstance().getEncounteredExceptions().size() == 0); } From d89e3d4af002b8663942149823c26e6540f15941 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 3 Sep 2024 19:55:39 -0400 Subject: [PATCH 36/38] fix: returning a value out of a function should not result in a bad optional access; --- src/compiler/CodeGen.cpp | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 8f8ca4d..69c5720 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -177,8 +177,12 @@ namespace Theta { assignmentRhs->setMappedBinaryenIndex(idxOfAssignment); string identName = assignmentIdentifier; - if (assignmentNode->getRight()->getNodeType() == ASTNode::FUNCTION_INVOCATION) { - identName = Compiler::getQualifiedFunctionIdentifier(identName, assignmentNode->getRight()->getResolvedType()); + shared_ptr rhsResolvedType = dynamic_pointer_cast(assignmentNode->getRight()->getResolvedType()); + + // If this is an assignment to the result of a function call, and the function call returns another function, we need to + // use the qualified name instead + if (assignmentNode->getRight()->getNodeType() == ASTNode::FUNCTION_INVOCATION && rhsResolvedType->getType() == DataTypes::FUNCTION) { + identName = Compiler::getQualifiedFunctionIdentifier(identName, rhsResolvedType); } scope.insert(identName, assignmentRhs); @@ -917,6 +921,8 @@ namespace Theta { identName = scopeRef.value(); } + cout << "Here looking up " << identName << endl; + shared_ptr identInScope = scope.lookup(identName).value(); // The ident in this case may refer to a parameter to a function, which may not have a resolvedType From 36c3ddc1544632bf8068c89930ba026911b9ee22 Mon Sep 17 00:00:00 2001 From: Alex Date: Tue, 3 Sep 2024 20:46:34 -0400 Subject: [PATCH 37/38] fix: correctly return value when an assignment is the last expression in a block --- src/compiler/CodeGen.cpp | 39 +++++++++++++++++++++++++++++---------- src/compiler/CodeGen.hpp | 2 ++ 2 files changed, 31 insertions(+), 10 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index 69c5720..b5d135d 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -166,6 +166,8 @@ namespace Theta { currentIdentIdx->setLiteralValue(to_string(idxOfAssignment + 1)); scope.insert(LOCAL_IDX_SCOPE_KEY, currentIdentIdx); + bool isLastInBlock = checkIsLastInBlock(assignmentNode); + // Function declarations dont get generated generically like the rest of the AST elements, they are not part of the "generate" method, // because they behave differently depending on where the function was declared. A function declared at the top level of capsule will // be hoisted and will have no inherent scope bound to it. @@ -186,12 +188,17 @@ namespace Theta { } scope.insert(identName, assignmentRhs); + + // If the last thing in the block is an assignment, we dont need to actually do the assignment at all, + // just return the value + if (isLastInBlock) return generate(assignmentRhs, module); return BinaryenLocalSet( module, idxOfAssignment, generate(assignmentRhs, module) ); + } shared_ptr originalDeclaration = dynamic_pointer_cast(assignmentNode->getRight()); @@ -241,18 +248,24 @@ namespace Theta { scopeReferences.insert(assignmentIdentifier, globalQualifiedFunctionName); vector expressions = storage.second; + + BinaryenExpressionRef addressRefExpression = BinaryenConst( + module, + BinaryenLiteralInt32(storage.first.getPointer().getAddress()) + ); // Returns a reference to the closure memory address - expressions.push_back( - BinaryenLocalSet( - module, - idxOfAssignment, - BinaryenConst( + if (isLastInBlock) { + expressions.push_back(addressRefExpression); + } else { + expressions.push_back( + BinaryenLocalSet( module, - BinaryenLiteralInt32(storage.first.getPointer().getAddress()) + idxOfAssignment, + addressRefExpression ) - ) - ); + ); + } BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; for (int i = 0; i < expressions.size(); i++) { @@ -921,8 +934,6 @@ namespace Theta { identName = scopeRef.value(); } - cout << "Here looking up " << identName << endl; - shared_ptr identInScope = scope.lookup(identName).value(); // The ident in this case may refer to a parameter to a function, which may not have a resolvedType @@ -1271,6 +1282,14 @@ namespace Theta { ); } + bool CodeGen::checkIsLastInBlock(shared_ptr node) { + if (node->getParent() == nullptr) return false; + if (!node->getParent()->hasMany()) return false; + + + return node->getId() == dynamic_pointer_cast(node->getParent())->getElements().back()->getId(); + } + int CodeGen::getByteSizeForType(shared_ptr type) { if (type->getType() == DataTypes::NUMBER) return 8; if (type->getType() == DataTypes::BOOLEAN) return 4; diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index c06c2dd..e66922f 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -114,6 +114,8 @@ namespace Theta { void bindIdentifierToScope(shared_ptr ast); void registerModuleFunctions(BinaryenModuleRef &module); + bool checkIsLastInBlock(shared_ptr node); + pair> generateAndStoreClosure( string qualifiedReferenceFunctionName, shared_ptr simplifiedReference, From b2e42925eb47cc59cb12447b8499819310f27624 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 5 Sep 2024 01:20:36 -0400 Subject: [PATCH 38/38] fix: direct curried functions should execute properly, not only when part of an assignment --- src/compiler/CodeGen.cpp | 83 ++++++++++++++++++++++++---------------- src/compiler/CodeGen.hpp | 6 +++ 2 files changed, 55 insertions(+), 34 deletions(-) diff --git a/src/compiler/CodeGen.cpp b/src/compiler/CodeGen.cpp index b5d135d..851bc84 100644 --- a/src/compiler/CodeGen.cpp +++ b/src/compiler/CodeGen.cpp @@ -96,7 +96,7 @@ namespace Theta { } else if (node->getNodeType() == ASTNode::FUNCTION_DECLARATION) { // The only time we should get here is if we have a function defined inside a function, // because the normal function declaration flow goes through the generateAssignment flow - simplifyNestedFunctionDeclaration(dynamic_pointer_cast(node), module); + return generateClosureFunctionDeclaration(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::FUNCTION_INVOCATION) { return generateFunctionInvocation(dynamic_pointer_cast(node), module); } else if (node->getNodeType() == ASTNode::CONTROL_FLOW) { @@ -201,12 +201,29 @@ namespace Theta { } - shared_ptr originalDeclaration = dynamic_pointer_cast(assignmentNode->getRight()); + return generateClosureFunctionDeclaration( + dynamic_pointer_cast(assignmentNode->getRight()), + module, + [module, idxOfAssignment, isLastInBlock](const BinaryenExpressionRef &addressRefExpression) { + if (isLastInBlock) return addressRefExpression; - shared_ptr simplifiedDeclaration = simplifyNestedFunctionDeclaration( - originalDeclaration, - module + return BinaryenLocalSet( + module, + idxOfAssignment, + addressRefExpression + ); + }, + make_pair(assignmentIdentifier, idxOfAssignment) ); + } + + BinaryenExpressionRef CodeGen::generateClosureFunctionDeclaration( + shared_ptr function, + BinaryenModuleRef &module, + std::function returnValueFormatter, + optional> assignmentIdentifierPair + ) { + shared_ptr simplifiedDeclaration = simplifyNestedFunctionDeclaration(function, module); // Generating a unique hash for this function is necessary because it will be stored on the module globally, // so we need to make sure there are no naming collisions @@ -223,30 +240,34 @@ namespace Theta { simplifiedDeclaration ); + // If an assignmentIdentifier was passed in, this function is being assigned to a variable. + // We need to add some items to the scope to make the function available elsewhere + if (assignmentIdentifierPair) { + simplifiedDeclaration->setMappedBinaryenIndex(assignmentIdentifierPair->second); + + string localQualifiedFunctionName = Compiler::getQualifiedFunctionIdentifier( + assignmentIdentifierPair->first, + function + ); + + // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced. This + // way the caller does not need to know the global function name in order to call it + scope.insert(globalQualifiedFunctionName, simplifiedDeclaration); + scopeReferences.insert(localQualifiedFunctionName, globalQualifiedFunctionName); + + // Also insert the assignment identifier into scope referenecs so that if we want to return a reference to the function + // using the identifier, we can do that. This will overwrite any previous scope references with that identifier, so only + // the most recent identifier of a given name can be returned as a reference + scopeReferences.insert(assignmentIdentifierPair->first, globalQualifiedFunctionName); + } + pair> storage = generateAndStoreClosure( globalQualifiedFunctionName, simplifiedDeclaration, - originalDeclaration, + function, module ); - simplifiedDeclaration->setMappedBinaryenIndex(idxOfAssignment); - - string localQualifiedFunctionName = Compiler::getQualifiedFunctionIdentifier( - assignmentIdentifier, - originalDeclaration - ); - - // Assign it in scope to the lhs identifier so we can always look it up later when it is referenced. This - // way the caller does not need to know the global function name in order to call it - scope.insert(globalQualifiedFunctionName, simplifiedDeclaration); - scopeReferences.insert(localQualifiedFunctionName, globalQualifiedFunctionName); - - // Also insert the assignment identifier into scope referenecs so that if we want to return a reference to the function - // using the identifier, we can do that. This will overwrite any previous scope references with that identifier, so only - // the most recent identifier of a given name can be returned as a reference - scopeReferences.insert(assignmentIdentifier, globalQualifiedFunctionName); - vector expressions = storage.second; BinaryenExpressionRef addressRefExpression = BinaryenConst( @@ -254,18 +275,10 @@ namespace Theta { BinaryenLiteralInt32(storage.first.getPointer().getAddress()) ); + BinaryenExpressionRef returnedValueExpression = returnValueFormatter(addressRefExpression); + // Returns a reference to the closure memory address - if (isLastInBlock) { - expressions.push_back(addressRefExpression); - } else { - expressions.push_back( - BinaryenLocalSet( - module, - idxOfAssignment, - addressRefExpression - ) - ); - } + expressions.push_back(returnedValueExpression); BinaryenExpressionRef* blockExpressions = new BinaryenExpressionRef[expressions.size()]; for (int i = 0; i < expressions.size(); i++) { @@ -437,6 +450,8 @@ namespace Theta { exit(1); } + + cout << "created simplified function declaration" << endl; return simplifiedDeclaration; } diff --git a/src/compiler/CodeGen.hpp b/src/compiler/CodeGen.hpp index e66922f..a814cae 100644 --- a/src/compiler/CodeGen.hpp +++ b/src/compiler/CodeGen.hpp @@ -39,6 +39,12 @@ namespace Theta { BinaryenModuleRef &module, bool addToExports = false ); + BinaryenExpressionRef generateClosureFunctionDeclaration( + shared_ptr node, + BinaryenModuleRef &module, + std::function returnValueFormatter = [](const BinaryenExpressionRef &addrExpr) { return addrExpr; }, + optional> assignmentIdentifierPair = nullopt + ); BinaryenExpressionRef generateFunctionInvocation(shared_ptr node, BinaryenModuleRef &module); BinaryenExpressionRef generateControlFlow(shared_ptr controlFlowNode, BinaryenModuleRef &module); BinaryenExpressionRef generateIdentifier(shared_ptr node, BinaryenModuleRef &module);