From a4bf1ce78dfe32bdcf121d15363da82ba5d31424 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Fri, 3 Jan 2025 09:13:49 -0500 Subject: [PATCH] work on v2 --- v2/boot.py | 20 ++++++ v2/exprs.py | 161 +++++++++++++++++++++++++++++++++++++++++++ v2/lex.py | 180 +++++++++++++++++++++++++++++++++++++++++++++++++ v2/mkast.py | 132 ++++++++++++++++++++++++++++++++++++ v2/shared.py | 79 ++++++++++++++++++++++ v2/test.v2.dal | 18 +++++ 6 files changed, 590 insertions(+) create mode 100644 v2/boot.py create mode 100644 v2/exprs.py create mode 100644 v2/lex.py create mode 100644 v2/mkast.py create mode 100644 v2/shared.py create mode 100644 v2/test.v2.dal diff --git a/v2/boot.py b/v2/boot.py new file mode 100644 index 0000000..7bd19d5 --- /dev/null +++ b/v2/boot.py @@ -0,0 +1,20 @@ +from lex import Lexer +from mkast import Parser + +def run(): + file = open("test.v2.dal").read() + lexer = Lexer(file) + lexer.lex_all() + print(f"{lexer.results}\n\n") + parser = Parser(lexer.results) + parser.parse_all() + + # print(f"{parser.results}\n\n") + # code_generator = ASM(parser.results, parser.symbol_tables) + # code_generator.generate_all() + # print(f"{code_generator.lines}") + # open("boot.s", "w+").write("\n".join(code_generator.lines)) + + +if __name__ == "__main__": + run() diff --git a/v2/exprs.py b/v2/exprs.py new file mode 100644 index 0000000..cd00b12 --- /dev/null +++ b/v2/exprs.py @@ -0,0 +1,161 @@ +from abc import ABC +from enum import Enum, auto +from typing import ForwardRef, Union + +from shared import PrimitiveTypes + + +class TaggedType(ABC): + pass + + +class AstirExpr(ABC): + def __init__(self, ty: Union["TypeInstance", None]) -> None: + if not isinstance(self, TaggedType) and ty is None: + raise Exception( + "Only expressions tagged as types may pass None to type parameter" + ) + self.ty = ty + + def _type(self) -> Union["TypeInstance", None]: + return self.ty + + +class Symbol: + def __init__( + self, name: str, inside: AstirExpr, belongs_to_table: int, id: int + ) -> None: + super().__init__() + self.name = name + self.inside = inside + self.belongs_to_table = belongs_to_table + self.id = id + + def __repr__(self) -> str: + return f'Symbol "{self.name}", value = {self.inside}. Belongs to = {self.belongs_to_table}. ID = {self.id}' + + +class SymbolTable: + def __init__(self, id: int, parent: int | None = None) -> None: + self.symbols: dict[int, Symbol] = {} + self.name_to_symbol: dict[str, int] = {} + self.last_id = 0 + self.id = id + self.parent = parent + + def lookup(self, to_find: str) -> Symbol | None: + if ( + to_find not in self.name_to_symbol + or self.name_to_symbol[to_find] not in self.symbols + ): + return None + return self.symbols[self.name_to_symbol[to_find]] + + def insert(self, name: str, val: AstirExpr) -> Symbol: + self.name_to_symbol[name] = self.last_id + symbol = Symbol(name, val, self.id, self.last_id) + self.symbols[self.last_id] = symbol + self.last_id += 1 + return symbol + + def next_id(self) -> int: + return self.last_id + 1 + + def __repr__(self) -> str: + return f"{self.symbols}" + + +class Identifier(AstirExpr, TaggedType): + def __init__(self, value: str): + super().__init__(None) + self.value = value + + def __repr__(self): + return f"Identifier(Value={self.value})" + + +class DataTypeDefinition(AstirExpr, TaggedType): + def __init__( + self, + name: str, + elements: AstirExpr | list[AstirExpr] | None, + # E.g. d'List a :: ... + # generic_params: [Identifier(value=a)] + generic_params: list[Identifier], + ): + super().__init__(None) + self.name: str = name + self.elements: AstirExpr | list[AstirExpr] | None = elements + self.generic_params = generic_params + + def __repr__(self): + return f"DataTypeDefinition(Name={self.name}, Inside={self.elements}, Generics={self.generic_params})" + + +class Unit(AstirExpr): + def __init__(self): + super().__init__(PrimitiveTypes.UNIT) + + def __repr__(self): + return "Unit" + + +class LambdaDefinition(AstirExpr, TaggedType): + def __init__( + self, + name: str, + parameter_types: list["TypeInstance"], + generic_params: list[Identifier], + ): + super().__init__(None) + self.name: str = name + self.parameter_types: list["TypeInstance"] = parameter_types + self.generic_params = generic_params + + +class Lambda(AstirExpr): + def __init__( + self, resolved_ty: AstirExpr, ty: "TypeInstance", parameters: SymbolTable + ): + super().__init__(ty) + if self.ty is None or not isinstance(resolved_ty, LambdaDefinition): + raise Exception("Expected LambdaDefinition type for Lambda.") + self.name: str = resolved_ty.name + self.parameters: SymbolTable = parameters + + +class TypeInstance(AstirExpr, TaggedType): + def __init__( + self, + resolved_type: DataTypeDefinition | PrimitiveTypes, + # E.g. \whatever List Int :: ... + filled_in_generics: list["TypeInstance"] | None = [], + ): + super().__init__(self) + self.resolved_type: DataTypeDefinition | PrimitiveTypes = resolved_type + self.filled_in_generics: list["TypeInstance"] | None = filled_in_generics + + +class Dummy(AstirExpr): + def __init__(self): + super().__init__(PrimitiveTypes.UNIT) + +class Parenthesized(AstirExpr, TaggedType): + def __init__(self, values: list[AstirExpr]): + super().__init__(None) + self.values = values +# class Type: +# def __init__(self, inside: PrimitiveTypes | AstirExpr): +# if isinstance(inside, AstirExpr) and not isinstance(inside, TaggedType): +# raise Exception( +# "Only expressions tagged as types are allowed to be passed into type class." +# ) +# self.inside = inside + + +# if __name__ == "__main__": +# symbol_table = SymbolTable() +# ident = Lambda( +# Type(LambdaDefinition("test_lambda", [Type(PrimitiveTypes.INT)])), symbol_table +# ) +# print(ident._type()) diff --git a/v2/lex.py b/v2/lex.py new file mode 100644 index 0000000..d1e20fd --- /dev/null +++ b/v2/lex.py @@ -0,0 +1,180 @@ +from typing import Callable +from shared import Cursor, PrimitiveTypes, TT, operators + +class Token: + def __init__(self, ty: TT, prim_ty: PrimitiveTypes | None = None, val=None) -> None: + self.ty = ty + self.val = val + self.prim_ty = prim_ty + + def __repr__(self) -> str: + return f"{self.ty} ({self.val})" + + +def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: + if ( + possible_op is None + or possible_op.ty is None + or possible_op.ty not in [TT.PLUS, TT.DASH] + ): + return None + op = operators[possible_op.ty.value] + return (possible_op.ty.value, op) + + +def is_valid_ident(c: str) -> bool: + return c.isalnum() or c == "_" + + +class Lexer(Cursor): + def __init__(self, input: str) -> None: + super().__init__(list(input)) + self.results: list[Token] = [] + + def lex_all(self) -> None: + while c := self.current(): + if c == " " or c == "\n": + self.advance() + continue + elif c == None: + break + lexed = self.lex() + if lexed.ty == TT.COMMENT: + self.advance() + continue + self.results.append(lexed) + self.advance() + + def collect_until( + self, + check: Callable[[str | None, str], bool], + devance_b4_break: bool = False, + escape: bool = False, + start_str: str = "", + ) -> str: + temp_str: str = start_str + while True: + c = self.current() + if c is None or check(c, temp_str): + if devance_b4_break: + self.at -= 1 + break + self.advance() + temp_str += c + return temp_str + + def lex(self) -> Token: + c = self.current() + if c is None: + raise Exception("Ran out of input") + elif c == "/": + if self.at + 1 < len(self.input) and self.input[self.at + 1] == "/": + self.at += 2 + self.collect_until(lambda a, _: a == "\n", False) + return Token(TT.COMMENT) + elif c == ":" and self.input[self.at + 1] == ":": + self.advance() + return Token(TT.DOUBLE_COLON) + elif c == '"': + self.advance() + string = "" + while c := self.current(): + if c is None or c == '"': + break + elif c == "\\": + self.advance() + c = self.current() + if c is None: + break + elif c == '"': + string += '"' + self.advance() + continue + self.advance() + string += c + return Token(TT.LITERAL, prim_ty=PrimitiveTypes.STR, val=string) + elif c not in TT and (is_valid_ident(c) or c == "."): + self.advance() + if (next := self.current()) and next == "'" and c in ["t", "d", "m"]: + # self.advance() + return Token(TT.PRIME_FORM, val=c) + + def identifier_check(c: str | None, rest: str) -> bool: + if (c is None) or (not is_valid_ident(c)) and c != ".": + return True + return False + + ident = self.collect_until(identifier_check, True, start_str=c) + + if "." in ident: + try: + number = float(ident) + sign = 0 if number >= 0 else 1 + + number = abs(number) + integer = int(number) + fractional = number - integer + integer_bin = ( + bin(integer).replace("0b", "") if integer != 0 else "0" + ) + + frac_bin: list[str] = ( + [] + ) # List to store the fractional binary digits + while ( + fractional and len(frac_bin) < 23 + 3 + ): # Stop after 23+3 bits to avoid overflow + fractional *= 2 # Multiply by 2 to shift digits left + bit = int(fractional) # Extract the integer part (0 or 1) + frac_bin.append(str(bit)) # Append the bit to the list + fractional -= ( + bit # Remove the integer part from the fractional value + ) + frac_bin2: str = "".join(frac_bin) + combined_bin = integer_bin + "." + frac_bin2 + + if ( + "1" in combined_bin + ): # Ensure there is at least one significant bit + first_one = combined_bin.index( + "1" + ) # Find the position of the first '1' + if "." in combined_bin and first_one > combined_bin.index("."): + first_one -= ( + 1 # Adjust for the position of the binary point + ) + exponent = ( + len(integer_bin) - 1 - first_one + ) # Calculate the exponent from normalization + mantissa = (integer_bin + frac_bin2)[ + first_one + 1 : first_one + 24 + ] # Extract mantissa bits + else: # Special case for zero-like numbers + exponent = 0 + mantissa = "0" * 23 # Mantissa is all zeros + + # Step 4: Encode the exponent (add bias of 127) + exponent += 127 # Apply the bias to the exponent + exponent_bin = ( + bin(exponent).replace("0b", "").zfill(8) + ) # Convert to 8-bit binary + + # Step 5: Pad the mantissa to 23 bits + mantissa = mantissa.ljust( + 23, "0" + ) # Ensure the mantissa has exactly 23 bits + + # Combine the components into a 32-bit IEEE 754 representation + ieee754 = f"{sign}{exponent_bin}{mantissa}" + return Token(TT.LITERAL, val=ieee754, prim_ty=PrimitiveTypes.FLOAT) + except ValueError: + raise Exception( + f'Something went wrong handling decimal: "{ident}"? check how many dots...' + ) + # TODO: TEMPORARY!! + elif ident.isdigit(): + return Token(TT.LITERAL, val=int(ident), prim_ty=PrimitiveTypes.INT) + return Token(TT.IDENT, val=ident) + else: + return Token(TT(c)) + return Token(TT.DUMMY) \ No newline at end of file diff --git a/v2/mkast.py b/v2/mkast.py new file mode 100644 index 0000000..b448fc0 --- /dev/null +++ b/v2/mkast.py @@ -0,0 +1,132 @@ +from shared import TT, PrimitiveTypes +from shared import Cursor +from exprs import ( + AstirExpr, + DataTypeDefinition, + Identifier, + Parenthesized, + SymbolTable, + Symbol, + Dummy, + TypeInstance, + Unit, +) +from lex import Token + + +class Lexer(Cursor): + pass + + +class Parser(Cursor): + def __init__(self, input: list[Token]): + super().__init__(input) + global_symbols: SymbolTable = SymbolTable(0) + global_symbols.insert("Str", DataTypeDefinition("Str", None, [])) + + self.all_symbol_tables: dict[int, SymbolTable] = {0: global_symbols} + self.current_symbol_table: int = 0 + + def lookup(self, name: str, symbol_table_id: int | None = None) -> Symbol | None: + symbol_table_id = ( + self.current_symbol_table if symbol_table_id is None else symbol_table_id + ) + if symbol_table_id not in self.all_symbol_tables: + return None + selected_symbol_table = self.all_symbol_tables[symbol_table_id] + lookup = selected_symbol_table.lookup(name) + if ( + lookup is None or name not in selected_symbol_table.name_to_symbol + ) and selected_symbol_table.parent is not None: + return self.lookup(name, selected_symbol_table.parent) + else: + return lookup + + def parse_all(self): + while self.at < len(self.input): + parsed = self.parse() + if parsed is None: + raise Exception("CANNOT PARSE NONE!") + print(f"{parsed}") + + def parse(self) -> AstirExpr | None: + current = self.current() + result: AstirExpr | None = None + if current is None: + return None + elif current.ty == TT.IDENT and current.val is not None: + self.advance() + ident_lookup = self.lookup(current.val) + if ident_lookup is not None: + print(ident_lookup) + result = Identifier(current.val) + elif current.ty == TT.OPEN_PAREN: + self.advance() + # if (current := self.current()) and current is not None: + inside_of_parens: list[AstirExpr] = [] + while ( + (current := self.current()) + and current is not None + and current.ty != TT.CLOSE_PAREN + ): + element = self.parse() + if element is None: + raise Exception("Did not expect None value as element to () type") + inside_of_parens.append(element) + self.advance() + if len(inside_of_parens) == 0: + result = Unit() + else: + result = Parenthesized(inside_of_parens) + elif current.ty == TT.PRIME_FORM and current.val is not None: + self.advance() + if current.val == "d": + type_definition_name = self.parse() + if not isinstance(type_definition_name, Identifier): + raise Exception( + "Expected an identifier as the name for a type defintion." + ) + possible_generics: list[Identifier] = [] + while ( + (current := self.current()) + and current is not None + and current.ty != TT.DOUBLE_COLON + ): + parsed_generic = self.parse() + if not isinstance(parsed_generic, (Identifier)): + raise Exception( + "Unexpected expression in generic parameter list." + ) + possible_generics.append(parsed_generic) + self.advance() + # if current.ty == TT.PIPE: + # self.advance() + # continue + elements: list[AstirExpr] = [] + val_stack: list[AstirExpr] = [] + while ( + (current := self.current()) + and current is not None + and current.ty != TT.DOUBLE_COLON + ): + if current.ty == TT.PIPE: + if len(val_stack) > 1: + elements.append(Parenthesized(val_stack)) + else: + elements.append(val_stack[0]) + val_stack = [] + self.advance() + continue + value = self.parse() + if not isinstance(value, (Unit, Identifier, TypeInstance, Dummy, Parenthesized)): + raise Exception( + f"Unexpected value: {value} in data type definition" + ) + val_stack.append(value) + elements.append(Parenthesized(val_stack)) + val_stack=[] + result = DataTypeDefinition( + type_definition_name.value, elements, possible_generics + ) + + return result diff --git a/v2/shared.py b/v2/shared.py new file mode 100644 index 0000000..973920d --- /dev/null +++ b/v2/shared.py @@ -0,0 +1,79 @@ +from abc import ABC +from enum import Enum, auto +from typing import Generic, TypeVar + + +class PrimitiveTypes(Enum): + STR = "Str" + INT = "Int" + FLOAT = "Float" + UNIT = "()" + LIST = "List" + + +class TT(Enum): + COLON = ":" + COMMA = "," + BACKSLASH = "\\" + FUNCTION_ARROW = "→" + PLUS = "+" + # minus! + DASH = "-" + DOUBLE_COLON = "::" + OPEN_PAREN = "(" + CLOSE_PAREN = ")" + OPEN_SQUARE = "[" + UNDERSCORE = "_" + GREATER_THAN = ">" + LESS_THAN = ">" + CURLY_OPEN = "{" + CURLY_CLOSE = "}" + EQ = "=" + PIPE = "|" + CLOSE_SQUARE = "]" + IDENT = "IDENT" + LITERAL = "LITERAL" + COMMENT = "COMMENT" + PRIME_FORM = "PRIME_FORM" + DUMMY = "DUMMY" + + +SYMBOLS = [TT.EQ, TT.LESS_THAN, TT.GREATER_THAN, TT.DASH, TT.FUNCTION_ARROW] + +operators = { + "+": { + "precedence": 1, + # 0 = Left, 1 = Right, 2 = None + "associativity": 0, + }, + "-": { + "precedence": 1, + # 0 = Left, 1 = Right, 2 = None + "associativity": 0, + }, +} + +T = TypeVar("T") + + +class Cursor(ABC, Generic[T]): + def __init__(self, input: list[T]) -> None: + super().__init__() + self.input = input + self.at = 0 + + def peek(self) -> T | None: + if self.at == 0: + return self.input[0] + else: + if 0 >= self.at + 1 > len(self.input): + return None + return self.input[self.at + 1] + + def advance(self) -> None: + self.at += 1 + + def current(self) -> T | None: + if self.at >= len(self.input): + return None + return self.input[self.at] diff --git a/v2/test.v2.dal b/v2/test.v2.dal new file mode 100644 index 0000000..e3ae041 --- /dev/null +++ b/v2/test.v2.dal @@ -0,0 +1,18 @@ +// d'List a :: Nil | Cons a (List a) +d'DummyExpr :: Identifier Str | ExprWithAnotherInside Str DummyExpr | StrLiteral Str + +lambda_that_returns_dummy_identifier\ :: DummyExpr → Identifier "hello_world" +lambda_that_returns_expr_with_another_expression\ :: DummyExpr → ExprWithAnotherInside "hello_world" StrLiteral "Hello, World!" +// Examples of lambda that takes a custom data type and pattern matches over it. +// To pattern match we define a lambda definition, then fill in the body definitions +// with pattern matched parameters +lambda_that_pattern_matches\DummyExpr :: () +lambda_that_pattern_matches\Identifier inside_str → () +lambda_that_pattern_matches\ExprWithAnotherInside name inside_expr → () +lambda_that_pattern_matches\StrLiteral literal_str → () + +// This adds two numbers, x and y, and returns the product x and y +lambda_definition_alone\int, int :: int + +lambda_definition_with_body_all_in_one\x int, y int :: int → x + y +lambda_definition_alone\x, y → x + y \ No newline at end of file