From 6b6d62714191854b60ffcc9b10ba9911a197a5d2 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Fri, 13 Dec 2024 09:20:19 -0500 Subject: [PATCH 01/15] start generating & learning about aarch64 asm --- README.md | 4 ++++ asm.py | 12 +++++++++++ ast.py | 1 - boot.dal | 7 ++----- boot.py | 7 ++++++- simple.dal | 1 + whatever | Bin 0 -> 16904 bytes whatever.s | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 8 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 simple.dal create mode 100755 whatever create mode 100644 whatever.s diff --git a/README.md b/README.md index 748bbac..8c6f539 100644 --- a/README.md +++ b/README.md @@ -8,3 +8,7 @@ ### New(ish?) ideas? - `m'` and `t'` are "prime forms". open issue for naming ideas or concerns! More on these later:tm: + + +justin@Justins-MBP-2 Cylindropuntia % as -o whatever.o whatever.s +justin@Justins-MBP-2 Cylindropuntia % ld -macosx_version_min 13.0.0 -o whatever whatever.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e _start -arch arm64 \ No newline at end of file diff --git a/asm.py b/asm.py index e69de29..75eea65 100644 --- a/asm.py +++ b/asm.py @@ -0,0 +1,12 @@ +from ast import Cursor, Expr, Token # type: ignore + + +class ASM(Cursor): + def __init__(self, input: list[Expr]) -> None: + super().__init__(input) + self.lines: list[str] = [".p2align 3"] + + def generate(self) -> None: + c_expr = self.current() + print(f"!! {c_expr}") + return None \ No newline at end of file diff --git a/ast.py b/ast.py index 1bdacd6..d0503ae 100644 --- a/ast.py +++ b/ast.py @@ -536,7 +536,6 @@ def parse(self) -> Expr | None: parts.append(part) - print(f"Handling prime form!! {ident} = {parts}") result = CustomDataType(ident, parts) elif c.ty == TT.LITERAL: if c.prim_ty is None or c.val is None: diff --git a/boot.dal b/boot.dal index ff3bdc2..b0af35c 100644 --- a/boot.dal +++ b/boot.dal @@ -1,8 +1,5 @@ -// Enums are super simple! -d'Option :: Some(a) | Empty - -return_float\ x int :: float → Some(2.32) +return_float\ x int :: Float → 2.32 // Who is whatever and why are we doing them...? -do_whatever\ :: (float, int) → (3.32, 54) \ No newline at end of file +do_whatever\ :: (float, int) → (3.32, 54) \ No newline at end of file diff --git a/boot.py b/boot.py index 771f1c7..6a5412f 100644 --- a/boot.py +++ b/boot.py @@ -1,5 +1,7 @@ from ast import Parser, Lexer # type: ignore +from asm import ASM # type: ignore + def run(): file = open("boot.dal").read() @@ -8,7 +10,10 @@ def run(): print(f"{lexer.results}\n\n") parser = Parser(lexer.results) parser.parse_all() - print(f"{parser.results}") + print(f"{parser.results}\n\n") + code_generator = ASM(parser.results) + code_generator.generate() + print(f"{code_generator.lines}") if __name__ == "__main__": diff --git a/simple.dal b/simple.dal new file mode 100644 index 0000000..89a81b1 --- /dev/null +++ b/simple.dal @@ -0,0 +1 @@ +return_int\ :: int → 23 \ No newline at end of file diff --git a/whatever b/whatever new file mode 100755 index 0000000000000000000000000000000000000000..6da6517087f0f6aca8bf6bbf631bd26bfa03dc70 GIT binary patch literal 16904 zcmeI4OGs2<6vw}LrP*i_nVJR@l$nT11qm%2rO8LWP^+n+%S`v$430CQS#9J)Wh*pD-`9Khf7kE$J-RYte$tr z&k0>SFilRikx5ci>5gh+O~>bpm1vLIs&64}Tiz~&UBt{%k~+d&!LTd^!_5&a{9fNx zJ)c)UMB1puoW~e1^Qx|_-S633X}r~~Lq=3hkBNFRVqoUDONbbK3*u0Wuv6E?&_&R6 zECYTL+KxDmNc+~n>)~Xy$8g&7FP4HsE;xQ8qOhaAtuPey7s@>$8Ja8$PIY`f^UiT& zx;Xz%#zNj>F|%q1=3H>?P01J~t7G~WwuGbhJVf<NfC(@GCcp%k025#WOn?b60Vco% zm;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(^x|CK;&u1Lh@(1QH5LJ5T; zu0xTwki0pmJQ9(OG0;$pa<-1h9H}RodZyio6LQ|H>E*SIr;OW#$OLux&pqa(g_M< zHMd3{J?u;hmkgH;-m{gyl|N*+e5u*uewk%++^?Fv-ZysB8+owMJY|1AJKJKd^E!U^ zPV}DYe=#{waW;SSZ|g(pjkUfjUp-aVhDR21cV|AkaCm0$;Pip?oP3ikv{BLRF{%sc Date: Fri, 13 Dec 2024 09:36:37 -0500 Subject: [PATCH 02/15] .{ --- asm.py | 8 +++++++- boot.dal | 6 ++++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/asm.py b/asm.py index 75eea65..8ea2dad 100644 --- a/asm.py +++ b/asm.py @@ -1,4 +1,4 @@ -from ast import Cursor, Expr, Token # type: ignore +from ast import Assignment, Cursor, Expr, Identifier, Lambda, Token # type: ignore class ASM(Cursor): @@ -8,5 +8,11 @@ def __init__(self, input: list[Expr]) -> None: def generate(self) -> None: c_expr = self.current() + if isinstance(c_expr, Assignment) and isinstance(c_expr.left, Identifier): + name = c_expr.left.value + if isinstance(c_expr.right, Lambda): + print(f"Handling lambda assignment {c_expr.left}.") + self.lines.append(f"{name}:") + return None print(f"!! {c_expr}") return None \ No newline at end of file diff --git a/boot.dal b/boot.dal index b0af35c..ed480c7 100644 --- a/boot.dal +++ b/boot.dal @@ -1,5 +1,7 @@ -return_float\ x int :: Float → 2.32 +// return_float\ x int :: float → 2.32 +return_int\ x int :: int → 2 // Who is whatever and why are we doing them...? -do_whatever\ :: (float, int) → (3.32, 54) \ No newline at end of file +// do_whatever\ :: (float, int) → (3.32, 54) +main\ :: int → return_int(2) \ No newline at end of file From 2a6f26aae51ac966821c02988c9e3fb06564de14 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 14 Dec 2024 09:31:40 -0500 Subject: [PATCH 03/15] updates ... updates ... updates --- asm.py | 30 +- ast_1.py | 805 +++++++++++++++++++++++++++++++++++++++++++++++++++++ boot.dal | 4 +- boot.py | 10 +- types.py | 22 ++ whatever | Bin 16904 -> 0 bytes whatever.s | 27 +- 7 files changed, 868 insertions(+), 30 deletions(-) create mode 100644 ast_1.py create mode 100644 types.py delete mode 100755 whatever diff --git a/asm.py b/asm.py index 8ea2dad..d52eb95 100644 --- a/asm.py +++ b/asm.py @@ -1,18 +1,18 @@ -from ast import Assignment, Cursor, Expr, Identifier, Lambda, Token # type: ignore +# from ast import Assignment, Cursor, Expr, Identifier, Lambda, Token # type: ignore -class ASM(Cursor): - def __init__(self, input: list[Expr]) -> None: - super().__init__(input) - self.lines: list[str] = [".p2align 3"] +# class ASM(Cursor): +# def __init__(self, input: list[Expr]) -> None: +# super().__init__(input) +# self.lines: list[str] = [".p2align 3"] - def generate(self) -> None: - c_expr = self.current() - if isinstance(c_expr, Assignment) and isinstance(c_expr.left, Identifier): - name = c_expr.left.value - if isinstance(c_expr.right, Lambda): - print(f"Handling lambda assignment {c_expr.left}.") - self.lines.append(f"{name}:") - return None - print(f"!! {c_expr}") - return None \ No newline at end of file +# def generate(self) -> None: +# c_expr = self.current() +# if isinstance(c_expr, Assignment) and isinstance(c_expr.left, Identifier): +# name = c_expr.left.value +# if isinstance(c_expr.right, Lambda): +# print(f"Handling lambda assignment {c_expr.left}.") +# self.lines.append(f"{name}:") +# return None +# print(f"!! {c_expr}") +# return None \ No newline at end of file diff --git a/ast_1.py b/ast_1.py new file mode 100644 index 0000000..8fe74e5 --- /dev/null +++ b/ast_1.py @@ -0,0 +1,805 @@ +from abc import ABC, abstractmethod +from os import read +import re +from enum import Enum, auto +from typing import Generic, Callable, Self, Type, TypeVar + + +class PrimitiveTypes(Enum): + INT = auto() + STR = auto() + FLOAT = auto() + UNIT = auto() + + +class TT(Enum): + COLON = ":" + COMMA = "," + BACKSLASH = "\\" + FUNCTION_ARROW = "→" + PLUS = "+" + # minus! + DASH = "-" + DOUBLE_COLON = "::" + OPEN_PAREN = "(" + CLOSE_PAREN = ")" + OPEN_SQUARE = "[" + PIPE = "|" + CLOSE_SQUARE = "]" + IDENT = "IDENT" + LITERAL = "LITERAL" + COMMENT = "COMMENT" + PRIME_FORM = "PRIME_FORM" + + +operators = { + "+": { + "precedence": 1, + # 0 = Left, 1 = Right, 2 = None + "associativity": 0, + }, + "-": { + "precedence": 1, + # 0 = Left, 1 = Right, 2 = None + "associativity": 0, + }, +} + + +class Token: + def __init__(self, ty: TT | None, prim_ty: PrimitiveTypes = None, val=None) -> None: + if ty == None: + raise Exception("Token type was none...") + self.ty = ty + self.val = val + self.prim_ty = prim_ty + + def __repr__(self) -> str: + return f"{self.ty} ({self.val})" + + +def is_valid_ident(c: str) -> bool: + return c.isalnum() or c == "_" + + +T = TypeVar("T") + + +# https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal +# simple colors for now...just need help to see my eyes suck +class bcolors: + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + + +class Cursor(ABC, Generic[T]): + def __init__(self, input: list[T]) -> None: + super().__init__() + self.input = input + self.at = 0 + + def advance(self) -> None: + self.at += 1 + + def current(self) -> T | None: + if self.at >= len(self.input): + return None + return self.input[self.at] + + +class Lexer(Cursor): + def __init__(self, input: str) -> None: + super().__init__(list(input)) + self.results: list[Token] = [] + + def lex_all(self) -> None: + while c := self.current(): + if c == " " or c == "\n": + self.advance() + continue + elif c == None: + break + lexed = self.lex() + if lexed.ty == TT.COMMENT: + self.advance() + continue + self.results.append(lexed) + self.advance() + + def collect_until( + self, + check: Callable[[str | None, str], bool], + devance_b4_break: bool = False, + start_str: str = "", + ) -> str: + temp_str = start_str + while True: + c = self.current() + if check(c, temp_str): + if devance_b4_break: + self.at -= 1 + break + self.advance() + temp_str += c + return temp_str + + def lex(self) -> Token: + c = self.current() + if c is None: + raise Exception("Ran out of input") + elif c == "/": + if self.at + 1 < len(self.input) and self.input[self.at + 1] == "/": + self.at += 2 + self.collect_until(lambda a, _: a == "\n", False) + return Token(TT.COMMENT) + elif c == ":" and self.input[self.at + 1] == ":": + self.advance() + return Token(TT.DOUBLE_COLON) + elif c == '"': + self.advance() + string = self.collect_until(lambda c, _: (c is None) or c == '"') + return Token(TT.LITERAL, prim_ty=PrimitiveTypes.STR, val=string) + elif c not in TT and (is_valid_ident(c) or c == "."): + self.advance() + if (next := self.current()) and next == "'" and c == "d": + # self.advance() + return Token(TT.PRIME_FORM) + + def identifier_check(c: str | None, rest: str) -> bool: + if (c is None) or (not is_valid_ident(c)) and c != ".": + return True + return False + + ident = self.collect_until(identifier_check, True, start_str=c) + + if "." in ident: + try: + number = float(ident) + sign = 0 if number >= 0 else 1 + + number = abs(number) + integer = int(number) + fractional = number - integer + integer_bin = ( + bin(integer).replace("0b", "") if integer != 0 else "0" + ) + + frac_bin = [] # List to store the fractional binary digits + while ( + fractional and len(frac_bin) < 23 + 3 + ): # Stop after 23+3 bits to avoid overflow + fractional *= 2 # Multiply by 2 to shift digits left + bit = int(fractional) # Extract the integer part (0 or 1) + frac_bin.append(str(bit)) # Append the bit to the list + fractional -= ( + bit # Remove the integer part from the fractional value + ) + frac_bin = "".join(frac_bin) + combined_bin = integer_bin + "." + frac_bin + + if ( + "1" in combined_bin + ): # Ensure there is at least one significant bit + first_one = combined_bin.index( + "1" + ) # Find the position of the first '1' + if "." in combined_bin and first_one > combined_bin.index("."): + first_one -= ( + 1 # Adjust for the position of the binary point + ) + exponent = ( + len(integer_bin) - 1 - first_one + ) # Calculate the exponent from normalization + mantissa = (integer_bin + frac_bin)[ + first_one + 1 : first_one + 24 + ] # Extract mantissa bits + else: # Special case for zero-like numbers + exponent = 0 + mantissa = "0" * 23 # Mantissa is all zeros + + # Step 4: Encode the exponent (add bias of 127) + exponent += 127 # Apply the bias to the exponent + exponent_bin = ( + bin(exponent).replace("0b", "").zfill(8) + ) # Convert to 8-bit binary + + # Step 5: Pad the mantissa to 23 bits + mantissa = mantissa.ljust( + 23, "0" + ) # Ensure the mantissa has exactly 23 bits + + # Combine the components into a 32-bit IEEE 754 representation + ieee754 = f"{sign}{exponent_bin}{mantissa}" + return Token(TT.LITERAL, val=ieee754, prim_ty=PrimitiveTypes.FLOAT) + except ValueError: + raise Exception( + f'Something went wrong handling decimal: "{ident}"? check how many dots...' + ) + # TODO: TEMPORARY!! + elif ident.isdigit(): + return Token(TT.LITERAL, val=int(ident), prim_ty=PrimitiveTypes.INT) + return Token(TT.IDENT, val=ident) + else: + return Token(TT(c)) + + +class Expr(ABC): + def __init__(self, ty: PrimitiveTypes | Self): + super().__init__() + self.ty = ty + + +# E = TypeVar("E", bound="Expr") + +# class Expr(ABC): +# def __init__(self, ty: Type[E]) -> None: +# super().__init__() +# self.ty = ty + +# @abstractmethod +# def __repr__(self) -> str: +# pass + + +# class Type(Expr): +# def __init__(self, inner: PrimitiveTypes | Expr): +# super().__init__(inner) +# self.inner = inner + + +def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: + if ( + possible_op is None + or possible_op.ty is None + or possible_op.ty not in [TT.PLUS, TT.DASH] + ): + return None + op = operators[possible_op.ty.value] + return (possible_op.ty.value, op) + + +def check_is_allowed(expr: Expr | None) -> bool: + allowed = expr is not None or ( + isinstance(expr, Parenthesized) + or isinstance(expr, Reference) + or isinstance(expr, Literal) + ) + if expr is not None and isinstance(expr, Identifier) and expr.for_assignment: + allowed = False + return allowed + + +class Symbol: + def __init__(self, name: str, val: Expr, belongs_to: int, id: int) -> None: + super().__init__() + self.name = name + self.val = val + self.belongs_to = belongs_to + self.id = id + + def __repr__(self) -> str: + return ( + bcolors.WARNING + + f'Symbol "{self.name}", value = {self.val}. Belongs to = {self.belongs_to}. ID = {self.id}' + + bcolors.ENDC + ) + + +class SymbolTable: + def __init__(self, id: int, parent: int | None = None) -> None: + self.symbols: dict[int, Symbol] = {} + self.name_to_id: dict[str, int] = {} + self.last_id = 0 + self.id = id + self.parent = parent + + def lookup(self, name: str) -> Symbol | None: + if name not in self.name_to_id: + return None + id = self.name_to_id[name] + return self.lookup_by_id(id) + + def lookup_by_id(self, id: int) -> Symbol | None: + if id not in self.symbols: + return None + return self.symbols[id] + + def insert(self, name: str, val: Expr) -> None: + self.last_id += 1 + symbol = Symbol(name, val, self.id, self.last_id) + self.symbols[self.last_id] = symbol + self.name_to_id[name] = self.last_id + + def __repr__(self) -> str: + return f"{self.symbols}" + + +class LambdaDefinition(Expr): + def __init__( + self, parameters: SymbolTable | list[Expr] + ): # TODO: accept symboltable or list[Expr] + super().__init__(self) + lambda_parameter_types = [] + if isinstance(parameters, SymbolTable): + for i in parameters.symbols: + symbol = parameters.symbols[i] + if symbol is None: + raise Exception("How did a none value sneak in?") + lambda_parameter_types.append(symbol.val.ty) + else: + lambda_parameter_types = parameters + self.parameters = lambda_parameter_types + + +class Lambda(Expr): + def __init__(self, parameters: SymbolTable, body: Expr): + lambda_def = LambdaDefinition(parameters) + super().__init__(lambda_def) + self.definition = lambda_def + self.body = body + + +class Parenthesized(Expr): + def __init__(self, inner: Expr = None, ty: Expr = None) -> None: + super().__init__(ty) + self.inner = inner + + def __repr__(self) -> str: + return f"Parenthesized({self.inner})" + + +class ShuntingYardAlgorithmResults(Expr): + def __init__(self, operators: list[str], results: list[Expr]) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.oeprators = operators + self.results = results + + def __repr__(self) -> str: + return f"ShuntingYardAlgorithmResults({self.results}, ops={self.oeprators})" + +class Application(Expr): + def __init__(self): + super().__init__(PrimitiveTypes.UNIT) + +class Identifier(Expr): + def __init__(self, value: str, for_assignment: bool = False) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.value = value + self.for_assignment = for_assignment + + def __repr__(self) -> str: + return f"Ident({self.value})" + + +class Tuple(Expr): + def __init__(self, values: list[Expr]) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.values = values + + def __repr__(self) -> str: + return f"Tuple({self.values})" + + +class Parameter(Expr): + def __repr__(self) -> str: + return f"Parameter" + + +class Lambda(Expr): + def __init__(self, parameters: SymbolTable, body: Expr) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.parameters = parameters + self.body = body + + def __repr__(self) -> str: + return f"Lambda(P={self.parameters},B={self.body})" + + +class Assignment(Expr): + def __init__(self, left: Expr, right: Expr) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.left = left + self.right = right + + def __repr__(self) -> str: + return f"Assignment ({self.left}) -> ({self.right})" + + +class Reference(Expr): + def __init__( + self, + name: str, + belongs_to: int, + symbol_id: int, + copy_val: bool = False, + ) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.name = name + self.symbol_id = symbol_id + self.belongs_to = belongs_to + self.copy_val = copy_val + + def __repr__(self) -> str: + return f"Ref(ST={self.belongs_to}, Ref={self.name}, ID={self.symbol_id})" + + +class Parameter(Expr): + def __init__(self) -> None: + super().__init__(PrimitiveTypes.UNIT) + + def __repr__(self) -> str: + return f"Parameter" + + +class Literal(Expr): + def __init__(self, literal_ty: Expr, val: any) -> None: + super().__init__(literal_ty) + self.val = val + + def __repr__(self) -> str: + return f"Literal(LTY={self.ty}, V={self.val})" + + +class PrimitiveType(Expr): + def __init__(self, inner: PrimitiveTypes) -> None: + super().__init__(inner) + self.inner = inner + + def __repr__(self) -> str: + return f"PrimitiveType(I={self.inner})" + + +class DataVariantWithInnerValue(Expr): + def __init__(self, name: Expr, inner_value: Expr) -> None: + super().__init__(ty=inner_value) + self.inner_value = inner_value + self.name = name + + def __repr__(self) -> str: + return f"DataVariantWithInnerValue(NAME={self.name}, IV={self.inner_value})" + + +""" +d'Custom_data_type :: int +d'Custom_data_type :: str +d'Custom_data_type :: float +d'Custom_data_type :: () +d'Custom_data_type :: OneVariant +d'Custom_data_type :: OneVariant | TwoVariant +d'Custom_data_type :: VariantWithData(int) +d'Option :: Some(int) | None +""" + + +class CustomDataType(Expr): + def __init__(self, name: Expr, dt: list[Expr]) -> None: + super().__init__(ty=dt) + self.dt = dt + self.name = name + + def __repr__(self) -> str: + return f"CustomDataType(_)" + + +class Parser(Cursor): + def __init__(self, input: list[Token]) -> None: + super().__init__(input) + self.results: list[Expr] = [] + global_symbols = SymbolTable(0) + # TODO: we are waiting for typedef! + global_symbols.insert("int", PrimitiveType(PrimitiveTypes.INT)) + global_symbols.insert("unit", PrimitiveType(PrimitiveTypes.UNIT)) + global_symbols.insert("str", PrimitiveType(PrimitiveTypes.STR)) + global_symbols.insert("float", PrimitiveType(PrimitiveTypes.FLOAT)) + + self.symbol_tables: dict[int, SymbolTable] = {0: global_symbols} + self.using_st: int = 0 + self.parsing_lambda_parameters = False + self.op_stack: list[str] = [] + # temp solution + self.current_number_of_advances = 0 + self.already_parsing_sya = False + + def resolve_type(self, ty: Expr) -> None: + pass + + def advance(self) -> None: + self.current_number_of_advances += 1 + return super().advance() + + def peek(self, amt: int = 1) -> Token | None: + if self.at + amt > len(self.input): + return None + return self.input[self.at + amt] + + def lookup(self, name: str, symbol_table_id: int | None = None) -> Symbol | None: + symbol_table_id = self.using_st if symbol_table_id is None else symbol_table_id + if symbol_table_id is None or symbol_table_id not in self.symbol_tables: + return None + symbol_table = self.symbol_tables[symbol_table_id] + symbol = symbol_table.lookup(name) + if symbol is None and (symbol_table.parent is not None): + return self.lookup(name, symbol_table.parent) + return symbol + + def parse_all(self) -> None: + while c := self.current(): + if c == None: + break + parsed = self.parse() + if parsed is None: + break + self.results.append(parsed) + self.current_number_of_advances = 0 + + def parse(self) -> Expr | None: + c = self.current() + result: Expr | None = None + if c is None: + result = None + elif c.ty == TT.PRIME_FORM: + self.advance() + if (next := self.current()) and next.ty != TT.IDENT: + raise Exception( + f"Expected double colon after the prime form...got {next}" + ) + ident = self.parse() + self.advance() + parts: list[Expr] = [] + while True: + c = self.current() + if c is None: + break + elif c.ty == TT.PIPE: + self.advance() + continue + part = self.parse() + if ( + not isinstance(part, Tuple) + and not isinstance(part, Reference) + and not isinstance(part, PrimitiveType) + and not isinstance(part, DataVariantWithInnerValue) + and ( + not isinstance(part, Identifier) + or (isinstance(part, Identifier) and part.for_assignment) + ) + ): + self.at -= 1 + break + + parts.append(part) + + result = CustomDataType(ident, parts) + elif c.ty == TT.LITERAL: + if c.prim_ty is None or c.val is None: + raise Exception("Invalid primitive type...how?") + self.advance() + result = Literal(PrimitiveType(c.prim_ty), c.val) + elif c.ty == TT.IDENT: + if c.val is None: + raise Exception("Identifier with no value?") + symbol = self.lookup(c.val) + if symbol is not None: + self.advance() + result = Reference(c.val, symbol.belongs_to, symbol.id) + else: + next = self.input[self.at + 1] + if ( + next is not None + and next.ty is TT.IDENT + and self.parsing_lambda_parameters + and self.using_st in self.symbol_tables + ): + self.advance() + sym_table = self.symbol_tables[self.using_st] + expr = self.parse() + if expr is not None and isinstance(expr, Reference): + sym_table.insert(c.val, expr) + result = Parameter() + elif next.ty is TT.OPEN_PAREN: + self.advance() + paren = self.parse() + # raise Exception(f"Enum value(?): {c.val} -> {paren}") + result = DataVariantWithInnerValue(Identifier(c.val), paren) + else: + self.advance() + for_assignment = False + if ( + (c2 := self.current()) + and c2 is not None + and c2.ty == TT.BACKSLASH + ): + for_assignment = True + result = Identifier(c.val, for_assignment) + + elif c.ty == TT.OPEN_PAREN: + self.advance() + the_between: list[Expr] = [] + has_comma: bool = False + while True: + c = self.current() + if c is not None: + if c.ty == TT.CLOSE_PAREN: + self.advance() + break + elif c.ty == TT.COMMA: + self.advance() + has_comma = True + continue + + expr = self.parse() + if expr is None: + self.at -= 1 + break + the_between.append(expr) + if len(the_between) == 0: + # We init Parenthesized with no expression so + # that it is treated as an empty tuple, non value + # or dead value. Its just a placeholder ig? + result = Parenthesized(ty=self.lookup("unit", 0)) + elif len(the_between) == 1: + # Init Parenthesized with an expression (the_between[0]) + # to do exactly what it says... for example (\ :: int ...) + result = Parenthesized(the_between[0]) + elif len(the_between) > 1 and has_comma: + # Handle tuples + result = Tuple(the_between) + + # TODO: handle all function call arg parsing + # function calls can use () but are not required + # all this should be handled down where the + # infix operators are but check if the previous + # expression was a reference, or fn def and + # then match arguments with arguments. + elif c.ty == TT.BACKSLASH: + self.advance() + symbol_table_id = list(self.symbol_tables.items())[-1][0] + 1 + lambda_symbol_table = SymbolTable(symbol_table_id, 0) + self.symbol_tables[symbol_table_id] = lambda_symbol_table + self.using_st = symbol_table_id + self.parsing_lambda_parameters = True + while True: + c = self.current() + if c is not None: + if c.ty == TT.COMMA: + self.advance() + continue + elif c.ty == TT.DOUBLE_COLON: + self.advance() + ret_type = self.parse() + if ret_type is None: + raise Exception( + f"Return type was not there or non identifier ({ret_type})" + ) + lambda_symbol_table.insert("ret", ret_type) + self.symbol_tables[symbol_table_id] = lambda_symbol_table + c = self.current() + if c is None or c.ty is not TT.FUNCTION_ARROW: + raise Exception( + f"Expected f.n. arrow after ret type... ({c})" + ) + self.advance() + break + expr = self.parse() + if expr is None or not isinstance(expr, Parameter): + self.at -= 1 + break + body = self.parse() + if body is None: + raise Exception(f"Lambda must have body {self.current()}") + popped = self.results.pop() + if not isinstance(popped, Identifier): + return popped + _lambda = Lambda(lambda_symbol_table, body) + self.using_st = 0 + self.symbol_tables[self.using_st].insert(popped.value, _lambda) + result = Assignment( + popped, + _lambda, + ) + + # At this point, past previous parsing, we should have advanced past + # the last token and now be face-to-face with the rare, elusive, OP! + c = self.current() + + if ( + isinstance(result, Reference) + and len(self.symbol_tables) > result.belongs_to >= 0 + # and c is not None + ): + print("!") + st = self.symbol_tables[result.belongs_to] + symbol = st.lookup_by_id(result.symbol_id) + if symbol is None: + raise Exception(f"Unkown symbol reference: {result}") + if isinstance(symbol.val, Lambda): + parameters = symbol.val.parameters + p_len = len(parameters.symbols.keys()) + print(f"!! {p_len}") + + # if its 1 then it HAS to be the return type...right? + # we can pass on doing anything. Leave the reference + # as is as no further handling is needed. However, + # we will flip the "copy_val" on the reference so later + # steps can quickly handle it + if p_len == 1: + result.copy_val = True + return result + elif p_len > 1: + # NOW we have more arguments so we will want to parse more. + self.current_number_of_advances = 0 + possible_args: list[Expr] = [] + + for k, ref in parameters.symbols.items(): + if ref.name == "ret": + continue + elif not isinstance(ref.val, Reference): + break + type_symbol = self.lookup(ref.val.name, ref.val.belongs_to) + if type_symbol is None or not isinstance( + type_symbol.val, PrimitiveType + ): + break + possible_arg = self.parse() + if isinstance( + possible_arg.ty.val, PrimitiveType + ) and isinstance(type_symbol.val, PrimitiveType): + if possible_arg.ty.val != type_symbol.val: + raise Exception( + f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" + ) + elif ( + possible_arg is None + or possible_arg.ty is None + or not isinstance(possible_arg.ty, Symbol) + ): + raise Exception( + f"Null type? 1. {possible_arg is None} 2. {possible_arg.ty is None} 3. {not isinstance(possible_arg.ty, Symbol)} {bcolors.OKCYAN}{bcolors.BOLD}({possible_arg.ty}){bcolors.ENDC}" + ) + possible_args.append(possible_arg) + if len(possible_args) == 0: + print("no possible args") + self.at = self.at - self.current_number_of_advances + self.current_number_of_advances = 0 + return result + + print(f"possible args: {possible_args}") + possible_op = get_op(c) + if not check_is_allowed(result) or result is None or self.already_parsing_sya: + return result + if possible_op is None: + return result + + self.advance() + # NOW WE START! begin working with the shunting yard algorithm + # for parsing arithmetic + self.op_stack.append(possible_op[0]) + self.already_parsing_sya = True + res: list[Expr] = [result] + while True: + c = self.current() + possible_op = get_op(c) + if c is None: + break + elif possible_op is not None: + self.op_stack.append(possible_op[0]) + self.advance() + continue + + parsed = self.parse() + if not check_is_allowed(parsed) or parsed is None: + self.at -= 1 + break + res.append(parsed) + + sya_res = ShuntingYardAlgorithmResults(self.op_stack, res) + self.op_stack = [] + self.already_parsing_sya = False + return sya_res # type: ignore diff --git a/boot.dal b/boot.dal index ed480c7..abbdce1 100644 --- a/boot.dal +++ b/boot.dal @@ -1,7 +1,7 @@ // return_float\ x int :: float → 2.32 -return_int\ x int :: int → 2 +take_int_and_add_2\ x int :: int → x + 2 // Who is whatever and why are we doing them...? // do_whatever\ :: (float, int) → (3.32, 54) -main\ :: int → return_int(2) \ No newline at end of file +main\ :: int → take_int_and_add_2 2 \ No newline at end of file diff --git a/boot.py b/boot.py index 6a5412f..dfa62b6 100644 --- a/boot.py +++ b/boot.py @@ -1,6 +1,6 @@ -from ast import Parser, Lexer # type: ignore +from ast_1 import Parser, Lexer # type: ignore -from asm import ASM # type: ignore +#from asm import ASM # type: ignore def run(): @@ -11,9 +11,9 @@ def run(): parser = Parser(lexer.results) parser.parse_all() print(f"{parser.results}\n\n") - code_generator = ASM(parser.results) - code_generator.generate() - print(f"{code_generator.lines}") + # code_generator = ASM(parser.results) + # code_generator.generate() + # print(f"{code_generator.lines}") if __name__ == "__main__": diff --git a/types.py b/types.py new file mode 100644 index 0000000..5b46e35 --- /dev/null +++ b/types.py @@ -0,0 +1,22 @@ +from ast_1 import Assignment, Expr, Lambda +from enum import Enum, auto + + +class PrimitiveTypes(Enum): + INT = auto() + STR = auto() + FLOAT = auto() + # () + UNIT = auto() + + +class Type(Expr): + def __init__(self, inner: PrimitiveTypes | Expr): + super().__init__(inner) + self.inner = inner + self.is_lambda = isinstance(inner, Lambda) + if isinstance(inner, Assignment): + right = inner.right + if isinstance(right, Lambda): + self.inner = right +# type: ignore diff --git a/whatever b/whatever deleted file mode 100755 index 6da6517087f0f6aca8bf6bbf631bd26bfa03dc70..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 16904 zcmeI4OGs2<6vw}LrP*i_nVJR@l$nT11qm%2rO8LWP^+n+%S`v$430CQS#9J)Wh*pD-`9Khf7kE$J-RYte$tr z&k0>SFilRikx5ci>5gh+O~>bpm1vLIs&64}Tiz~&UBt{%k~+d&!LTd^!_5&a{9fNx zJ)c)UMB1puoW~e1^Qx|_-S633X}r~~Lq=3hkBNFRVqoUDONbbK3*u0Wuv6E?&_&R6 zECYTL+KxDmNc+~n>)~Xy$8g&7FP4HsE;xQ8qOhaAtuPey7s@>$8Ja8$PIY`f^UiT& zx;Xz%#zNj>F|%q1=3H>?P01J~t7G~WwuGbhJVf<NfC(@GCcp%k025#WOn?b60Vco% zm;e)C0!)AjFaajO1egF5U;<2l2`~XBzyz286JP>NfC(^x|CK;&u1Lh@(1QH5LJ5T; zu0xTwki0pmJQ9(OG0;$pa<-1h9H}RodZyio6LQ|H>E*SIr;OW#$OLux&pqa(g_M< zHMd3{J?u;hmkgH;-m{gyl|N*+e5u*uewk%++^?Fv-ZysB8+owMJY|1AJKJKd^E!U^ zPV}DYe=#{waW;SSZ|g(pjkUfjUp-aVhDR21cV|AkaCm0$;Pip?oP3ikv{BLRF{%sc Date: Sat, 14 Dec 2024 13:42:40 -0500 Subject: [PATCH 04/15] problem found for types, fixing see comments --- .gitignore | 3 ++- ast_1.py | 23 +++++++++++------------ 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index fbd179f..0349727 100644 --- a/.gitignore +++ b/.gitignore @@ -5,4 +5,5 @@ Main.o Main *.o *.hi -__pycache__ \ No newline at end of file +__pycache__ +whatever \ No newline at end of file diff --git a/ast_1.py b/ast_1.py index 8fe74e5..bed970d 100644 --- a/ast_1.py +++ b/ast_1.py @@ -235,7 +235,6 @@ def __init__(self, ty: PrimitiveTypes | Self): super().__init__() self.ty = ty - # E = TypeVar("E", bound="Expr") # class Expr(ABC): @@ -364,10 +363,12 @@ def __init__(self, operators: list[str], results: list[Expr]) -> None: def __repr__(self) -> str: return f"ShuntingYardAlgorithmResults({self.results}, ops={self.oeprators})" + class Application(Expr): def __init__(self): super().__init__(PrimitiveTypes.UNIT) + class Identifier(Expr): def __init__(self, value: str, for_assignment: bool = False) -> None: super().__init__(PrimitiveTypes.UNIT) @@ -439,7 +440,7 @@ def __repr__(self) -> str: class Literal(Expr): - def __init__(self, literal_ty: Expr, val: any) -> None: + def __init__(self, literal_ty: PrimitiveTypes, val: any) -> None: super().__init__(literal_ty) self.val = val @@ -450,10 +451,10 @@ def __repr__(self) -> str: class PrimitiveType(Expr): def __init__(self, inner: PrimitiveTypes) -> None: super().__init__(inner) - self.inner = inner + self.val = inner def __repr__(self) -> str: - return f"PrimitiveType(I={self.inner})" + return f"PrimitiveType(I={self.val})" class DataVariantWithInnerValue(Expr): @@ -748,17 +749,15 @@ def parse(self) -> Expr | None: ): break possible_arg = self.parse() - if isinstance( - possible_arg.ty.val, PrimitiveType - ) and isinstance(type_symbol.val, PrimitiveType): - if possible_arg.ty.val != type_symbol.val: - raise Exception( - f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" - ) + print(f"{possible_arg.ty.ty} == {type_symbol.val.ty}") + if possible_arg.ty.ty != type_symbol.val.ty: + raise Exception( + f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" + ) elif ( possible_arg is None or possible_arg.ty is None - or not isinstance(possible_arg.ty, Symbol) + # or not isinstance(possible_arg.ty, Symbol) ): raise Exception( f"Null type? 1. {possible_arg is None} 2. {possible_arg.ty is None} 3. {not isinstance(possible_arg.ty, Symbol)} {bcolors.OKCYAN}{bcolors.BOLD}({possible_arg.ty}){bcolors.ENDC}" From 37796f6304a6a12b0ed11f24eb48a7da3faf5e66 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 14 Dec 2024 16:19:35 -0500 Subject: [PATCH 05/15] fn application --- ast_1.py | 24 +++++++++++++++--------- boot.dal | 4 ---- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/ast_1.py b/ast_1.py index bed970d..fe7feb7 100644 --- a/ast_1.py +++ b/ast_1.py @@ -235,6 +235,7 @@ def __init__(self, ty: PrimitiveTypes | Self): super().__init__() self.ty = ty + # E = TypeVar("E", bound="Expr") # class Expr(ABC): @@ -364,11 +365,6 @@ def __repr__(self) -> str: return f"ShuntingYardAlgorithmResults({self.results}, ops={self.oeprators})" -class Application(Expr): - def __init__(self): - super().__init__(PrimitiveTypes.UNIT) - - class Identifier(Expr): def __init__(self, value: str, for_assignment: bool = False) -> None: super().__init__(PrimitiveTypes.UNIT) @@ -489,6 +485,16 @@ def __repr__(self) -> str: return f"CustomDataType(_)" +class Application(Expr): + def __init__(self, lambda_ref: Reference, parameters: list[Expr]): + super().__init__(PrimitiveTypes.UNIT) + self.lambda_ref = lambda_ref + self.parameters = parameters + + def __repr__(self) -> str: + return f"Application(Ref={self.lambda_ref}, P={self.parameters})" + + class Parser(Cursor): def __init__(self, input: list[Token]) -> None: super().__init__(input) @@ -715,7 +721,6 @@ def parse(self) -> Expr | None: and len(self.symbol_tables) > result.belongs_to >= 0 # and c is not None ): - print("!") st = self.symbol_tables[result.belongs_to] symbol = st.lookup_by_id(result.symbol_id) if symbol is None: @@ -723,7 +728,6 @@ def parse(self) -> Expr | None: if isinstance(symbol.val, Lambda): parameters = symbol.val.parameters p_len = len(parameters.symbols.keys()) - print(f"!! {p_len}") # if its 1 then it HAS to be the return type...right? # we can pass on doing anything. Leave the reference @@ -749,7 +753,6 @@ def parse(self) -> Expr | None: ): break possible_arg = self.parse() - print(f"{possible_arg.ty.ty} == {type_symbol.val.ty}") if possible_arg.ty.ty != type_symbol.val.ty: raise Exception( f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" @@ -769,7 +772,10 @@ def parse(self) -> Expr | None: self.current_number_of_advances = 0 return result - print(f"possible args: {possible_args}") + return Application( + Reference(symbol.name, symbol.belongs_to, symbol.id, False), + possible_args, + ) possible_op = get_op(c) if not check_is_allowed(result) or result is None or self.already_parsing_sya: return result diff --git a/boot.dal b/boot.dal index abbdce1..45daa56 100644 --- a/boot.dal +++ b/boot.dal @@ -1,7 +1,3 @@ -// return_float\ x int :: float → 2.32 take_int_and_add_2\ x int :: int → x + 2 -// Who is whatever and why are we doing them...? - -// do_whatever\ :: (float, int) → (3.32, 54) main\ :: int → take_int_and_add_2 2 \ No newline at end of file From e90688fc8ee13db3a8f5bda49bd839eaf87438a6 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 14 Dec 2024 21:46:40 -0500 Subject: [PATCH 06/15] large refactoring for mypy... --- ast_1.py | 458 +++++++++------------------------------------------ ast_exprs.py | 207 +++++++++++++++++++++++ common.py | 56 +++++++ 3 files changed, 343 insertions(+), 378 deletions(-) create mode 100644 ast_exprs.py create mode 100644 common.py diff --git a/ast_1.py b/ast_1.py index fe7feb7..330e75d 100644 --- a/ast_1.py +++ b/ast_1.py @@ -1,53 +1,18 @@ from abc import ABC, abstractmethod +from ast import Expr, Tuple from os import read import re from enum import Enum, auto -from typing import Generic, Callable, Self, Type, TypeVar - - -class PrimitiveTypes(Enum): - INT = auto() - STR = auto() - FLOAT = auto() - UNIT = auto() - - -class TT(Enum): - COLON = ":" - COMMA = "," - BACKSLASH = "\\" - FUNCTION_ARROW = "→" - PLUS = "+" - # minus! - DASH = "-" - DOUBLE_COLON = "::" - OPEN_PAREN = "(" - CLOSE_PAREN = ")" - OPEN_SQUARE = "[" - PIPE = "|" - CLOSE_SQUARE = "]" - IDENT = "IDENT" - LITERAL = "LITERAL" - COMMENT = "COMMENT" - PRIME_FORM = "PRIME_FORM" - - -operators = { - "+": { - "precedence": 1, - # 0 = Left, 1 = Right, 2 = None - "associativity": 0, - }, - "-": { - "precedence": 1, - # 0 = Left, 1 = Right, 2 = None - "associativity": 0, - }, -} +from typing import Generic, Callable, Self, Type, TypeVar, Union + +from common import TT, PrimitiveTypes, bcolors, operators +from ast_exprs import AstirExpr, ShuntingYardAlgorithmResults, Identifier, Literal, PrimitiveType, Reference, AstirTuple, SymbolTable, Parameter, Symbol, Parenthesized, Lambda, Assignment, Application, check_is_allowed class Token: - def __init__(self, ty: TT | None, prim_ty: PrimitiveTypes = None, val=None) -> None: + def __init__( + self, ty: TT | None, prim_ty: PrimitiveTypes | None = None, val=None + ) -> None: if ty == None: raise Exception("Token type was none...") self.ty = ty @@ -57,6 +22,15 @@ def __init__(self, ty: TT | None, prim_ty: PrimitiveTypes = None, val=None) -> N def __repr__(self) -> str: return f"{self.ty} ({self.val})" +def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: + if ( + possible_op is None + or possible_op.ty is None + or possible_op.ty not in [TT.PLUS, TT.DASH] + ): + return None + op = operators[possible_op.ty.value] + return (possible_op.ty.value, op) def is_valid_ident(c: str) -> bool: return c.isalnum() or c == "_" @@ -65,20 +39,6 @@ def is_valid_ident(c: str) -> bool: T = TypeVar("T") -# https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal -# simple colors for now...just need help to see my eyes suck -class bcolors: - HEADER = "\033[95m" - OKBLUE = "\033[94m" - OKCYAN = "\033[96m" - OKGREEN = "\033[92m" - WARNING = "\033[93m" - FAIL = "\033[91m" - ENDC = "\033[0m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" - - class Cursor(ABC, Generic[T]): def __init__(self, input: list[T]) -> None: super().__init__() @@ -119,10 +79,10 @@ def collect_until( devance_b4_break: bool = False, start_str: str = "", ) -> str: - temp_str = start_str + temp_str: str = start_str while True: c = self.current() - if check(c, temp_str): + if c is None or check(c, temp_str): if devance_b4_break: self.at -= 1 break @@ -171,7 +131,9 @@ def identifier_check(c: str | None, rest: str) -> bool: bin(integer).replace("0b", "") if integer != 0 else "0" ) - frac_bin = [] # List to store the fractional binary digits + frac_bin: list[str] = ( + [] + ) # List to store the fractional binary digits while ( fractional and len(frac_bin) < 23 + 3 ): # Stop after 23+3 bits to avoid overflow @@ -181,8 +143,8 @@ def identifier_check(c: str | None, rest: str) -> bool: fractional -= ( bit # Remove the integer part from the fractional value ) - frac_bin = "".join(frac_bin) - combined_bin = integer_bin + "." + frac_bin + frac_bin2: str = "".join(frac_bin) + combined_bin = integer_bin + "." + frac_bin2 if ( "1" in combined_bin @@ -197,7 +159,7 @@ def identifier_check(c: str | None, rest: str) -> bool: exponent = ( len(integer_bin) - 1 - first_one ) # Calculate the exponent from normalization - mantissa = (integer_bin + frac_bin)[ + mantissa = (integer_bin + frac_bin2)[ first_one + 1 : first_one + 24 ] # Extract mantissa bits else: # Special case for zero-like numbers @@ -228,277 +190,13 @@ def identifier_check(c: str | None, rest: str) -> bool: return Token(TT.IDENT, val=ident) else: return Token(TT(c)) - - -class Expr(ABC): - def __init__(self, ty: PrimitiveTypes | Self): - super().__init__() - self.ty = ty - - -# E = TypeVar("E", bound="Expr") - -# class Expr(ABC): -# def __init__(self, ty: Type[E]) -> None: -# super().__init__() -# self.ty = ty - -# @abstractmethod -# def __repr__(self) -> str: -# pass - - -# class Type(Expr): -# def __init__(self, inner: PrimitiveTypes | Expr): -# super().__init__(inner) -# self.inner = inner - - -def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: - if ( - possible_op is None - or possible_op.ty is None - or possible_op.ty not in [TT.PLUS, TT.DASH] - ): - return None - op = operators[possible_op.ty.value] - return (possible_op.ty.value, op) - - -def check_is_allowed(expr: Expr | None) -> bool: - allowed = expr is not None or ( - isinstance(expr, Parenthesized) - or isinstance(expr, Reference) - or isinstance(expr, Literal) - ) - if expr is not None and isinstance(expr, Identifier) and expr.for_assignment: - allowed = False - return allowed - - -class Symbol: - def __init__(self, name: str, val: Expr, belongs_to: int, id: int) -> None: - super().__init__() - self.name = name - self.val = val - self.belongs_to = belongs_to - self.id = id - - def __repr__(self) -> str: - return ( - bcolors.WARNING - + f'Symbol "{self.name}", value = {self.val}. Belongs to = {self.belongs_to}. ID = {self.id}' - + bcolors.ENDC - ) - - -class SymbolTable: - def __init__(self, id: int, parent: int | None = None) -> None: - self.symbols: dict[int, Symbol] = {} - self.name_to_id: dict[str, int] = {} - self.last_id = 0 - self.id = id - self.parent = parent - - def lookup(self, name: str) -> Symbol | None: - if name not in self.name_to_id: - return None - id = self.name_to_id[name] - return self.lookup_by_id(id) - - def lookup_by_id(self, id: int) -> Symbol | None: - if id not in self.symbols: - return None - return self.symbols[id] - - def insert(self, name: str, val: Expr) -> None: - self.last_id += 1 - symbol = Symbol(name, val, self.id, self.last_id) - self.symbols[self.last_id] = symbol - self.name_to_id[name] = self.last_id - - def __repr__(self) -> str: - return f"{self.symbols}" - - -class LambdaDefinition(Expr): - def __init__( - self, parameters: SymbolTable | list[Expr] - ): # TODO: accept symboltable or list[Expr] - super().__init__(self) - lambda_parameter_types = [] - if isinstance(parameters, SymbolTable): - for i in parameters.symbols: - symbol = parameters.symbols[i] - if symbol is None: - raise Exception("How did a none value sneak in?") - lambda_parameter_types.append(symbol.val.ty) - else: - lambda_parameter_types = parameters - self.parameters = lambda_parameter_types - - -class Lambda(Expr): - def __init__(self, parameters: SymbolTable, body: Expr): - lambda_def = LambdaDefinition(parameters) - super().__init__(lambda_def) - self.definition = lambda_def - self.body = body - - -class Parenthesized(Expr): - def __init__(self, inner: Expr = None, ty: Expr = None) -> None: - super().__init__(ty) - self.inner = inner - - def __repr__(self) -> str: - return f"Parenthesized({self.inner})" - - -class ShuntingYardAlgorithmResults(Expr): - def __init__(self, operators: list[str], results: list[Expr]) -> None: - super().__init__(PrimitiveTypes.UNIT) - self.oeprators = operators - self.results = results - - def __repr__(self) -> str: - return f"ShuntingYardAlgorithmResults({self.results}, ops={self.oeprators})" - - -class Identifier(Expr): - def __init__(self, value: str, for_assignment: bool = False) -> None: - super().__init__(PrimitiveTypes.UNIT) - self.value = value - self.for_assignment = for_assignment - - def __repr__(self) -> str: - return f"Ident({self.value})" - - -class Tuple(Expr): - def __init__(self, values: list[Expr]) -> None: - super().__init__(PrimitiveTypes.UNIT) - self.values = values - - def __repr__(self) -> str: - return f"Tuple({self.values})" - - -class Parameter(Expr): - def __repr__(self) -> str: - return f"Parameter" - - -class Lambda(Expr): - def __init__(self, parameters: SymbolTable, body: Expr) -> None: - super().__init__(PrimitiveTypes.UNIT) - self.parameters = parameters - self.body = body - - def __repr__(self) -> str: - return f"Lambda(P={self.parameters},B={self.body})" - - -class Assignment(Expr): - def __init__(self, left: Expr, right: Expr) -> None: - super().__init__(PrimitiveTypes.UNIT) - self.left = left - self.right = right - - def __repr__(self) -> str: - return f"Assignment ({self.left}) -> ({self.right})" - - -class Reference(Expr): - def __init__( - self, - name: str, - belongs_to: int, - symbol_id: int, - copy_val: bool = False, - ) -> None: - super().__init__(PrimitiveTypes.UNIT) - self.name = name - self.symbol_id = symbol_id - self.belongs_to = belongs_to - self.copy_val = copy_val - - def __repr__(self) -> str: - return f"Ref(ST={self.belongs_to}, Ref={self.name}, ID={self.symbol_id})" - - -class Parameter(Expr): - def __init__(self) -> None: - super().__init__(PrimitiveTypes.UNIT) - - def __repr__(self) -> str: - return f"Parameter" - - -class Literal(Expr): - def __init__(self, literal_ty: PrimitiveTypes, val: any) -> None: - super().__init__(literal_ty) - self.val = val - - def __repr__(self) -> str: - return f"Literal(LTY={self.ty}, V={self.val})" - - -class PrimitiveType(Expr): - def __init__(self, inner: PrimitiveTypes) -> None: - super().__init__(inner) - self.val = inner - - def __repr__(self) -> str: - return f"PrimitiveType(I={self.val})" - - -class DataVariantWithInnerValue(Expr): - def __init__(self, name: Expr, inner_value: Expr) -> None: - super().__init__(ty=inner_value) - self.inner_value = inner_value - self.name = name - - def __repr__(self) -> str: - return f"DataVariantWithInnerValue(NAME={self.name}, IV={self.inner_value})" - - -""" -d'Custom_data_type :: int -d'Custom_data_type :: str -d'Custom_data_type :: float -d'Custom_data_type :: () -d'Custom_data_type :: OneVariant -d'Custom_data_type :: OneVariant | TwoVariant -d'Custom_data_type :: VariantWithData(int) -d'Option :: Some(int) | None -""" - - -class CustomDataType(Expr): - def __init__(self, name: Expr, dt: list[Expr]) -> None: - super().__init__(ty=dt) - self.dt = dt - self.name = name - - def __repr__(self) -> str: - return f"CustomDataType(_)" - - -class Application(Expr): - def __init__(self, lambda_ref: Reference, parameters: list[Expr]): - super().__init__(PrimitiveTypes.UNIT) - self.lambda_ref = lambda_ref - self.parameters = parameters - - def __repr__(self) -> str: - return f"Application(Ref={self.lambda_ref}, P={self.parameters})" + return Token(None) class Parser(Cursor): def __init__(self, input: list[Token]) -> None: super().__init__(input) - self.results: list[Expr] = [] + self.results: list['AstirExpr'] = [] global_symbols = SymbolTable(0) # TODO: we are waiting for typedef! global_symbols.insert("int", PrimitiveType(PrimitiveTypes.INT)) @@ -546,49 +244,49 @@ def parse_all(self) -> None: self.results.append(parsed) self.current_number_of_advances = 0 - def parse(self) -> Expr | None: + def parse(self) -> AstirExpr | None: c = self.current() - result: Expr | None = None + result: AstirExpr | None = None if c is None: - result = None - elif c.ty == TT.PRIME_FORM: - self.advance() - if (next := self.current()) and next.ty != TT.IDENT: - raise Exception( - f"Expected double colon after the prime form...got {next}" - ) - ident = self.parse() - self.advance() - parts: list[Expr] = [] - while True: - c = self.current() - if c is None: - break - elif c.ty == TT.PIPE: - self.advance() - continue - part = self.parse() - if ( - not isinstance(part, Tuple) - and not isinstance(part, Reference) - and not isinstance(part, PrimitiveType) - and not isinstance(part, DataVariantWithInnerValue) - and ( - not isinstance(part, Identifier) - or (isinstance(part, Identifier) and part.for_assignment) - ) - ): - self.at -= 1 - break - - parts.append(part) + return None - result = CustomDataType(ident, parts) + # elif c.ty == TT.PRIME_FORM: + # self.advance() + # if (next := self.current()) and next.ty != TT.IDENT: + # raise Exception( + # f"Expected double colon after the prime form...got {next}" + # ) + # ident = self.parse() + # self.advance() + # parts: list[Expr] = [] + # while True: + # c = self.current() + # if c is None: + # break + # elif c.ty == TT.PIPE: + # self.advance() + # continue + # part = self.parse() + # if ( + # not isinstance(part, Tuple) + # and not isinstance(part, Reference) + # and not isinstance(part, PrimitiveType) + # and ( + # not isinstance(part, Identifier) + # or (isinstance(part, Identifier) and part.for_assignment) + # ) + # ): + # self.at -= 1 + # break + + # parts.append(part) + + # result = CustomDataType(ident, parts) elif c.ty == TT.LITERAL: if c.prim_ty is None or c.val is None: raise Exception("Invalid primitive type...how?") self.advance() - result = Literal(PrimitiveType(c.prim_ty), c.val) + result = Literal(c.prim_ty, c.val) elif c.ty == TT.IDENT: if c.val is None: raise Exception("Identifier with no value?") @@ -610,12 +308,12 @@ def parse(self) -> Expr | None: if expr is not None and isinstance(expr, Reference): sym_table.insert(c.val, expr) result = Parameter() - elif next.ty is TT.OPEN_PAREN: - self.advance() - paren = self.parse() - # raise Exception(f"Enum value(?): {c.val} -> {paren}") - result = DataVariantWithInnerValue(Identifier(c.val), paren) - else: + # elif next.ty is TT.OPEN_PAREN: + # self.advance() + # paren = self.parse() + # # raise Exception(f"Enum value(?): {c.val} -> {paren}") + # result = DataVariantWithInnerValue(Identifier(c.val), paren) + else: self.advance() for_assignment = False if ( @@ -628,7 +326,7 @@ def parse(self) -> Expr | None: elif c.ty == TT.OPEN_PAREN: self.advance() - the_between: list[Expr] = [] + the_between: list[AstirExpr] = [] has_comma: bool = False while True: c = self.current() @@ -650,14 +348,14 @@ def parse(self) -> Expr | None: # We init Parenthesized with no expression so # that it is treated as an empty tuple, non value # or dead value. Its just a placeholder ig? - result = Parenthesized(ty=self.lookup("unit", 0)) + result = Parenthesized(ty=PrimitiveTypes.UNIT) elif len(the_between) == 1: # Init Parenthesized with an expression (the_between[0]) # to do exactly what it says... for example (\ :: int ...) - result = Parenthesized(the_between[0]) + result = Parenthesized(PrimitiveType(PrimitiveTypes.UNIT), the_between[0]) elif len(the_between) > 1 and has_comma: # Handle tuples - result = Tuple(the_between) + result = AstirTuple(the_between) # TODO: handle all function call arg parsing # function calls can use () but are not required @@ -712,6 +410,8 @@ def parse(self) -> Expr | None: _lambda, ) + if result is None: + raise Exception("Failed to parse ANYTHING.") # At this point, past previous parsing, we should have advanced past # the last token and now be face-to-face with the rare, elusive, OP! c = self.current() @@ -726,7 +426,7 @@ def parse(self) -> Expr | None: if symbol is None: raise Exception(f"Unkown symbol reference: {result}") if isinstance(symbol.val, Lambda): - parameters = symbol.val.parameters + parameters = symbol.val.definition.parameters p_len = len(parameters.symbols.keys()) # if its 1 then it HAS to be the return type...right? @@ -740,7 +440,7 @@ def parse(self) -> Expr | None: elif p_len > 1: # NOW we have more arguments so we will want to parse more. self.current_number_of_advances = 0 - possible_args: list[Expr] = [] + possible_args: list[AstirExpr] = [] for k, ref in parameters.symbols.items(): if ref.name == "ret": @@ -753,6 +453,8 @@ def parse(self) -> Expr | None: ): break possible_arg = self.parse() + if possible_arg is None: + raise Exception("Failed to parse") if possible_arg.ty.ty != type_symbol.val.ty: raise Exception( f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" @@ -787,7 +489,7 @@ def parse(self) -> Expr | None: # for parsing arithmetic self.op_stack.append(possible_op[0]) self.already_parsing_sya = True - res: list[Expr] = [result] + res: list[AstirExpr] = [result] while True: c = self.current() possible_op = get_op(c) diff --git a/ast_exprs.py b/ast_exprs.py new file mode 100644 index 0000000..8b3210b --- /dev/null +++ b/ast_exprs.py @@ -0,0 +1,207 @@ +from abc import ABC +from common import TT, PrimitiveTypes, bcolors, operators +from typing import Type, Any, Union + +class AstirExpr(ABC): + def __init__(self, ty: Union['PrimitiveTypes', 'AstirExpr']): + super().__init__() + self.ty = ty + +def check_is_allowed(AstirExpr: AstirExpr | None) -> bool: + allowed = AstirExpr is not None or ( + isinstance(AstirExpr, Parenthesized) + or isinstance(AstirExpr, Reference) + or isinstance(AstirExpr, Literal) + ) + if AstirExpr is not None and isinstance(AstirExpr, Identifier) and AstirExpr.for_assignment: + allowed = False + return allowed + + +class Symbol: + def __init__(self, name: str, val: AstirExpr, belongs_to: int, id: int) -> None: + super().__init__() + self.name = name + self.val = val + self.belongs_to = belongs_to + self.id = id + + def __repr__(self) -> str: + return ( + bcolors.WARNING + + f'Symbol "{self.name}", value = {self.val}. Belongs to = {self.belongs_to}. ID = {self.id}' + + bcolors.ENDC + ) + + +class SymbolTable: + def __init__(self, id: int, parent: int | None = None) -> None: + self.symbols: dict[int, Symbol] = {} + self.name_to_id: dict[str, int] = {} + self.last_id = 0 + self.id = id + self.parent = parent + + def lookup(self, name: str) -> Symbol | None: + if name not in self.name_to_id: + return None + id = self.name_to_id[name] + return self.lookup_by_id(id) + + def lookup_by_id(self, id: int) -> Symbol | None: + if id not in self.symbols: + return None + return self.symbols[id] + + def insert(self, name: str, val: AstirExpr) -> None: + self.last_id += 1 + symbol = Symbol(name, val, self.id, self.last_id) + self.symbols[self.last_id] = symbol + self.name_to_id[name] = self.last_id + + def __repr__(self) -> str: + return f"{self.symbols}" + + +class LambdaDefinition(AstirExpr): + def __init__( + self, parameters: SymbolTable# | list[PrimitiveTypes | AstirExpr] + ): # TODO: accept symboltable or list[AstirExpr] + super().__init__(self) + # todo: + # lambda_parameter_types: list[PrimitiveTypes | AstirExpr] = [] + # if fix_params: + # if isinstance(parameters, SymbolTable): + # for i in parameters.symbols: + # symbol = parameters.symbols[i] + # if symbol is None: + # raise Exception("How did a none value sneak in?") + # lambda_parameter_types.append(symbol.val.ty) + # else: + # lambda_parameter_types = parameters + # self.parameters = lambda_parameter_types + + self.parameters=parameters + + +class Lambda(AstirExpr): + def __init__(self, parameters: SymbolTable, body: AstirExpr): + lambda_def = LambdaDefinition(parameters) + super().__init__(lambda_def) + self.definition = lambda_def + self.body = body + + +class Parenthesized(AstirExpr): + def __init__(self, ty: AstirExpr | PrimitiveTypes, inner: AstirExpr | None = None) -> None: + super().__init__(ty) + self.inner = inner + + def __repr__(self) -> str: + return f"Parenthesized({self.inner})" + + +class ShuntingYardAlgorithmResults(AstirExpr): + def __init__(self, operators: list[str], results: list[AstirExpr]) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.oeprators = operators + self.results = results + + def __repr__(self) -> str: + return f"ShuntingYardAlgorithmResults({self.results}, ops={self.oeprators})" + + +class Identifier(AstirExpr): + def __init__(self, value: str, for_assignment: bool = False) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.value = value + self.for_assignment = for_assignment + + def __repr__(self) -> str: + return f"Ident({self.value})" + + +class AstirTuple(AstirExpr): + def __init__(self, values: list[AstirExpr]) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.values = values + + def __repr__(self) -> str: + return f"Tuple({self.values})" + + +class Parameter(AstirExpr): + def __init__(self): + super().__init__(PrimitiveTypes.UNIT) + def __repr__(self) -> str: + return f"Parameter" + +class Assignment(AstirExpr): + def __init__(self, left: AstirExpr, right: AstirExpr) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.left = left + self.right = right + + def __repr__(self) -> str: + return f"Assignment ({self.left}) -> ({self.right})" + + +class Reference(AstirExpr): + def __init__( + self, + name: str, + belongs_to: int, + symbol_id: int, + copy_val: bool = False, + ) -> None: + super().__init__(PrimitiveTypes.UNIT) + self.name = name + self.symbol_id = symbol_id + self.belongs_to = belongs_to + self.copy_val = copy_val + + def __repr__(self) -> str: + return f"Ref(ST={self.belongs_to}, Ref={self.name}, ID={self.symbol_id})" + + +class Literal(AstirExpr): + def __init__(self, literal_ty: PrimitiveTypes, val: Any) -> None: + super().__init__(literal_ty) + self.val = val + + def __repr__(self) -> str: + return f"Literal(LTY={self.ty}, V={self.val})" + + +class PrimitiveType(AstirExpr): + def __init__(self, inner: PrimitiveTypes) -> None: + super().__init__(inner) + self.val = inner + + def __repr__(self) -> str: + return f"PrimitiveType(I={self.val})" + + +""" +d'Custom_data_type :: int +d'Custom_data_type :: str +d'Custom_data_type :: float +d'Custom_data_type :: () +d'Custom_data_type :: OneVariant +d'Custom_data_type :: OneVariant | TwoVariant +d'Custom_data_type :: VariantWithData(int) +d'Option :: Some(int) | None +""" + + +class Application(AstirExpr): + def __init__(self, lambda_ref: Reference, parameters: list[AstirExpr]): + super().__init__(PrimitiveTypes.UNIT) + self.lambda_ref = lambda_ref + self.parameters = parameters + + def __repr__(self) -> str: + return f"Application(Ref={self.lambda_ref}, P={self.parameters})" + + +# type: ignore diff --git a/common.py b/common.py new file mode 100644 index 0000000..b9be42e --- /dev/null +++ b/common.py @@ -0,0 +1,56 @@ +from enum import Enum, auto + + +# https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal +# simple colors for now...just need help to see my eyes suck +class bcolors: + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + + +class PrimitiveTypes(Enum): + INT = auto() + STR = auto() + FLOAT = auto() + UNIT = auto() + + +class TT(Enum): + COLON = ":" + COMMA = "," + BACKSLASH = "\\" + FUNCTION_ARROW = "→" + PLUS = "+" + # minus! + DASH = "-" + DOUBLE_COLON = "::" + OPEN_PAREN = "(" + CLOSE_PAREN = ")" + OPEN_SQUARE = "[" + PIPE = "|" + CLOSE_SQUARE = "]" + IDENT = "IDENT" + LITERAL = "LITERAL" + COMMENT = "COMMENT" + PRIME_FORM = "PRIME_FORM" + + +operators = { + "+": { + "precedence": 1, + # 0 = Left, 1 = Right, 2 = None + "associativity": 0, + }, + "-": { + "precedence": 1, + # 0 = Left, 1 = Right, 2 = None + "associativity": 0, + }, +} From f1383796db5325baa96557d3679166d7327da507 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 14 Dec 2024 23:09:14 -0500 Subject: [PATCH 07/15] fix all mypy issues we had --- ast_1.py | 31 ++++++++++++++++++++++++++----- ast_exprs.py | 6 ++++++ 2 files changed, 32 insertions(+), 5 deletions(-) diff --git a/ast_1.py b/ast_1.py index 330e75d..63978c5 100644 --- a/ast_1.py +++ b/ast_1.py @@ -6,7 +6,23 @@ from typing import Generic, Callable, Self, Type, TypeVar, Union from common import TT, PrimitiveTypes, bcolors, operators -from ast_exprs import AstirExpr, ShuntingYardAlgorithmResults, Identifier, Literal, PrimitiveType, Reference, AstirTuple, SymbolTable, Parameter, Symbol, Parenthesized, Lambda, Assignment, Application, check_is_allowed +from ast_exprs import ( + AstirExpr, + ShuntingYardAlgorithmResults, + Identifier, + Literal, + PrimitiveType, + Reference, + AstirTuple, + SymbolTable, + Parameter, + Symbol, + Parenthesized, + Lambda, + Assignment, + Application, + check_is_allowed, +) class Token: @@ -22,6 +38,7 @@ def __init__( def __repr__(self) -> str: return f"{self.ty} ({self.val})" + def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: if ( possible_op is None @@ -32,6 +49,7 @@ def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: op = operators[possible_op.ty.value] return (possible_op.ty.value, op) + def is_valid_ident(c: str) -> bool: return c.isalnum() or c == "_" @@ -196,7 +214,7 @@ def identifier_check(c: str | None, rest: str) -> bool: class Parser(Cursor): def __init__(self, input: list[Token]) -> None: super().__init__(input) - self.results: list['AstirExpr'] = [] + self.results: list["AstirExpr"] = [] global_symbols = SymbolTable(0) # TODO: we are waiting for typedef! global_symbols.insert("int", PrimitiveType(PrimitiveTypes.INT)) @@ -313,7 +331,7 @@ def parse(self) -> AstirExpr | None: # paren = self.parse() # # raise Exception(f"Enum value(?): {c.val} -> {paren}") # result = DataVariantWithInnerValue(Identifier(c.val), paren) - else: + else: self.advance() for_assignment = False if ( @@ -352,7 +370,9 @@ def parse(self) -> AstirExpr | None: elif len(the_between) == 1: # Init Parenthesized with an expression (the_between[0]) # to do exactly what it says... for example (\ :: int ...) - result = Parenthesized(PrimitiveType(PrimitiveTypes.UNIT), the_between[0]) + result = Parenthesized( + PrimitiveType(PrimitiveTypes.UNIT), the_between[0] + ) elif len(the_between) > 1 and has_comma: # Handle tuples result = AstirTuple(the_between) @@ -455,7 +475,8 @@ def parse(self) -> AstirExpr | None: possible_arg = self.parse() if possible_arg is None: raise Exception("Failed to parse") - if possible_arg.ty.ty != type_symbol.val.ty: + + if possible_arg.ty != type_symbol.val.ty: raise Exception( f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" ) diff --git a/ast_exprs.py b/ast_exprs.py index 8b3210b..1e3565c 100644 --- a/ast_exprs.py +++ b/ast_exprs.py @@ -6,6 +6,7 @@ class AstirExpr(ABC): def __init__(self, ty: Union['PrimitiveTypes', 'AstirExpr']): super().__init__() self.ty = ty + def check_is_allowed(AstirExpr: AstirExpr | None) -> bool: allowed = AstirExpr is not None or ( @@ -83,6 +84,9 @@ def __init__( self.parameters=parameters + def __repr__(self): + return f"LambdaDef(Parameters={self.parameters})" + class Lambda(AstirExpr): def __init__(self, parameters: SymbolTable, body: AstirExpr): @@ -91,6 +95,8 @@ def __init__(self, parameters: SymbolTable, body: AstirExpr): self.definition = lambda_def self.body = body + def __repr__(self): + return f"Lambda(Def={self.definition}, Body={self.body})" class Parenthesized(AstirExpr): def __init__(self, ty: AstirExpr | PrimitiveTypes, inner: AstirExpr | None = None) -> None: From 3e2680666520c2cd32c9e0c3e60f177b4d1124d8 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 14 Dec 2024 23:13:26 -0500 Subject: [PATCH 08/15] . --- ast.py | 760 ------------------------------------------------------- types.py | 22 -- 2 files changed, 782 deletions(-) delete mode 100644 ast.py delete mode 100644 types.py diff --git a/ast.py b/ast.py deleted file mode 100644 index d0503ae..0000000 --- a/ast.py +++ /dev/null @@ -1,760 +0,0 @@ -from abc import ABC, abstractmethod -from os import read -import re -from enum import Enum, auto -from typing import Generic, Callable, Type, TypeVar - - -class PrimitiveTypes(Enum): - INT = 0 - STR = 1 - FLOAT = 2 - # () - UNIT = 3 - - -class TT(Enum): - COLON = ":" - COMMA = "," - BACKSLASH = "\\" - FUNCTION_ARROW = "→" - PLUS = "+" - # minus! - DASH = "-" - DOUBLE_COLON = "::" - OPEN_PAREN = "(" - CLOSE_PAREN = ")" - OPEN_SQUARE = "[" - PIPE = "|" - CLOSE_SQUARE = "]" - IDENT = "IDENT" - LITERAL = "LITERAL" - COMMENT = "COMMENT" - PRIME_FORM = "PRIME_FORM" - - -operators = { - "+": { - "precedence": 1, - # 0 = Left, 1 = Right, 2 = None - "associativity": 0, - }, - "-": { - "precedence": 1, - # 0 = Left, 1 = Right, 2 = None - "associativity": 0, - }, -} - - -class Token: - def __init__(self, ty: TT | None, prim_ty: PrimitiveTypes = None, val=None) -> None: - if ty == None: - raise Exception("Token type was none...") - self.ty = ty - self.val = val - self.prim_ty = prim_ty - - def __repr__(self) -> str: - return f"{self.ty} ({self.val})" - - -def is_valid_ident(c: str) -> bool: - return c.isalnum() or c == "_" - - -T = TypeVar("T") - - -# https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal -# simple colors for now...just need help to see my eyes suck -class bcolors: - HEADER = "\033[95m" - OKBLUE = "\033[94m" - OKCYAN = "\033[96m" - OKGREEN = "\033[92m" - WARNING = "\033[93m" - FAIL = "\033[91m" - ENDC = "\033[0m" - BOLD = "\033[1m" - UNDERLINE = "\033[4m" - - -class Cursor(ABC, Generic[T]): - def __init__(self, input: list[T]) -> None: - super().__init__() - self.input = input - self.at = 0 - - def advance(self) -> None: - self.at += 1 - - def current(self) -> T | None: - if self.at >= len(self.input): - return None - return self.input[self.at] - - -class Lexer(Cursor): - def __init__(self, input: str) -> None: - super().__init__(list(input)) - self.results: list[Token] = [] - - def lex_all(self) -> None: - while c := self.current(): - if c == " " or c == "\n": - self.advance() - continue - elif c == None: - break - lexed = self.lex() - if lexed.ty == TT.COMMENT: - self.advance() - continue - self.results.append(lexed) - self.advance() - - def collect_until( - self, - check: Callable[[str | None, str], bool], - devance_b4_break: bool = False, - start_str: str = "", - ) -> str: - temp_str = start_str - while True: - c = self.current() - if check(c, temp_str): - if devance_b4_break: - self.at -= 1 - break - self.advance() - temp_str += c - return temp_str - - def lex(self) -> Token: - c = self.current() - if c is None: - raise Exception("Ran out of input") - elif c == "/": - if self.at + 1 < len(self.input) and self.input[self.at + 1] == "/": - self.at += 2 - self.collect_until(lambda a, _: a == "\n", False) - return Token(TT.COMMENT) - elif c == ":" and self.input[self.at + 1] == ":": - self.advance() - return Token(TT.DOUBLE_COLON) - elif c == '"': - self.advance() - string = self.collect_until(lambda c, _: (c is None) or c == '"') - return Token(TT.LITERAL, prim_ty=PrimitiveTypes.STR, val=string) - elif c not in TT and (is_valid_ident(c) or c == "."): - self.advance() - if (next := self.current()) and next == "'" and c == "d": - # self.advance() - return Token(TT.PRIME_FORM) - - def identifier_check(c: str | None, rest: str) -> bool: - if (c is None) or (not is_valid_ident(c)) and c != ".": - return True - return False - - ident = self.collect_until(identifier_check, True, start_str=c) - - if "." in ident: - try: - number = float(ident) - sign = 0 if number >= 0 else 1 - - number = abs(number) - integer = int(number) - fractional = number - integer - integer_bin = ( - bin(integer).replace("0b", "") if integer != 0 else "0" - ) - - frac_bin = [] # List to store the fractional binary digits - while ( - fractional and len(frac_bin) < 23 + 3 - ): # Stop after 23+3 bits to avoid overflow - fractional *= 2 # Multiply by 2 to shift digits left - bit = int(fractional) # Extract the integer part (0 or 1) - frac_bin.append(str(bit)) # Append the bit to the list - fractional -= ( - bit # Remove the integer part from the fractional value - ) - frac_bin = "".join(frac_bin) - combined_bin = integer_bin + "." + frac_bin - - if ( - "1" in combined_bin - ): # Ensure there is at least one significant bit - first_one = combined_bin.index( - "1" - ) # Find the position of the first '1' - if "." in combined_bin and first_one > combined_bin.index("."): - first_one -= ( - 1 # Adjust for the position of the binary point - ) - exponent = ( - len(integer_bin) - 1 - first_one - ) # Calculate the exponent from normalization - mantissa = (integer_bin + frac_bin)[ - first_one + 1 : first_one + 24 - ] # Extract mantissa bits - else: # Special case for zero-like numbers - exponent = 0 - mantissa = "0" * 23 # Mantissa is all zeros - - # Step 4: Encode the exponent (add bias of 127) - exponent += 127 # Apply the bias to the exponent - exponent_bin = ( - bin(exponent).replace("0b", "").zfill(8) - ) # Convert to 8-bit binary - - # Step 5: Pad the mantissa to 23 bits - mantissa = mantissa.ljust( - 23, "0" - ) # Ensure the mantissa has exactly 23 bits - - # Combine the components into a 32-bit IEEE 754 representation - ieee754 = f"{sign}{exponent_bin}{mantissa}" - return Token(TT.LITERAL, val=ieee754, prim_ty=PrimitiveTypes.FLOAT) - except ValueError: - raise Exception( - f'Something went wrong handling decimal: "{ident}"? check how many dots...' - ) - # TODO: TEMPORARY!! - elif ident.isdigit(): - return Token(TT.LITERAL, val=int(ident), prim_ty=PrimitiveTypes.INT) - return Token(TT.IDENT, val=ident) - else: - return Token(TT(c)) - - -E = TypeVar("E", bound="Expr") - - -class Expr(ABC): - # Not all Exprs have types! - def __init__(self, ty: Type[E] = None) -> None: - super().__init__() - self.ty = ty - - @abstractmethod - def __repr__(self) -> str: - pass - - -def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: - if ( - possible_op is None - or possible_op.ty is None - or possible_op.ty not in [TT.PLUS, TT.DASH] - ): - return None - op = operators[possible_op.ty.value] - return (possible_op.ty.value, op) - - -def check_is_allowed(expr: Expr | None) -> bool: - allowed = expr is not None or ( - isinstance(expr, Parenthesized) - or isinstance(expr, Reference) - or isinstance(expr, Literal) - ) - if expr is not None and isinstance(expr, Identifier) and expr.for_assignment: - allowed = False - return allowed - - -class Symbol: - def __init__(self, name: str, val: Expr, belongs_to: int, id: int) -> None: - super().__init__() - self.name = name - self.val = val - self.belongs_to = belongs_to - self.id = id - - def __repr__(self) -> str: - return ( - bcolors.WARNING - + f'Symbol "{self.name}", value = {self.val}. Belongs to = {self.belongs_to}. ID = {self.id}' - + bcolors.ENDC - ) - - -class SymbolTable: - def __init__(self, id: int, parent: int | None = None) -> None: - self.symbols: dict[int, Symbol] = {} - self.name_to_id: dict[str, int] = {} - self.last_id = 0 - self.id = id - self.parent = parent - - def lookup(self, name: str) -> Symbol | None: - if name not in self.name_to_id: - return None - id = self.name_to_id[name] - return self.lookup_by_id(id) - - def lookup_by_id(self, id: int) -> Symbol | None: - if id not in self.symbols: - return None - return self.symbols[id] - - def insert(self, name: str, val: Expr) -> None: - self.last_id += 1 - symbol = Symbol(name, val, self.id, self.last_id) - self.symbols[self.last_id] = symbol - self.name_to_id[name] = self.last_id - - def __repr__(self) -> str: - return f"{self.symbols}" - - -class Parenthesized(Expr): - def __init__(self, inner: Expr = None, ty: Expr = None) -> None: - super().__init__(ty) - self.inner = inner - - def __repr__(self) -> str: - return f"Parenthesized({self.inner})" - - -class ShuntingYardAlgorithmResults(Expr): - def __init__(self, operators: list[str], results: list[Expr]) -> None: - super().__init__() - self.oeprators = operators - self.results = results - - def __repr__(self) -> str: - return f"ShuntingYardAlgorithmResults({self.results}, ops={self.oeprators})" - - -class Identifier(Expr): - def __init__(self, value: str, for_assignment: bool = False) -> None: - super().__init__() - self.value = value - self.for_assignment = for_assignment - - def __repr__(self) -> str: - return f"Ident({self.value})" - - -class Tuple(Expr): - def __init__(self, values: list[Expr]) -> None: - super().__init__() - self.values = values - - def __repr__(self) -> str: - return f"Tuple({self.values})" - - -class Parameter(Expr): - def __repr__(self) -> str: - return f"Parameter" - - -class Lambda(Expr): - def __init__(self, parameters: SymbolTable, body: Expr) -> None: - super().__init__() - self.parameters = parameters - self.body = body - - def __repr__(self) -> str: - return f"Lambda(P={self.parameters},B={self.body})" - - -class Assignment(Expr): - def __init__(self, left: Expr, right: Expr) -> None: - super().__init__() - self.left = left - self.right = right - - def __repr__(self) -> str: - return f"Assignment ({self.left}) -> ({self.right})" - - -class Reference(Expr): - def __init__( - self, - name: str, - belongs_to: int, - symbol_id: int, - copy_val: bool = False, - ) -> None: - super().__init__() - self.name = name - self.symbol_id = symbol_id - self.belongs_to = belongs_to - self.copy_val = copy_val - - def __repr__(self) -> str: - return f"Ref(ST={self.belongs_to}, Ref={self.name}, ID={self.symbol_id})" - - -class Parameter(Expr): - def __repr__(self) -> str: - return f"Parameter" - - -class Literal(Expr): - def __init__(self, literal_ty: Expr, val: any) -> None: - super().__init__(literal_ty) - self.val = val - - def __repr__(self) -> str: - return f"Literal(LTY={self.ty}, V={self.val})" - - -class PrimitiveType(Expr): - def __init__(self, inner: PrimitiveTypes) -> None: - super().__init__() - self.inner = inner - - def __repr__(self) -> str: - return f"PrimitiveType(I={self.inner})" - - -class DataVariantWithInnerValue(Expr): - def __init__(self, name: Expr, inner_value: Expr) -> None: - super().__init__(ty=inner_value) - self.inner_value = inner_value - self.name = name - - def __repr__(self) -> str: - return f"DataVariantWithInnerValue(NAME={self.name}, IV={self.inner_value})" - - -""" -d'Custom_data_type :: int -d'Custom_data_type :: str -d'Custom_data_type :: float -d'Custom_data_type :: () -d'Custom_data_type :: OneVariant -d'Custom_data_type :: OneVariant | TwoVariant -d'Custom_data_type :: VariantWithData(int) -d'Option :: Some(int) | None -""" - - -class CustomDataType(Expr): - def __init__(self, name: Expr, dt: list[Expr]) -> None: - super().__init__(ty=dt) - self.dt = dt - self.name = name - - def __repr__(self) -> str: - return f"CustomDataType(_)" - - -class Parser(Cursor): - def __init__(self, input: list[Token]) -> None: - super().__init__(input) - self.results: list[Expr] = [] - global_symbols = SymbolTable(0) - # TODO: we are waiting for typedef! - global_symbols.insert("int", PrimitiveType(PrimitiveTypes.INT)) - global_symbols.insert("unit", PrimitiveType(PrimitiveTypes.UNIT)) - global_symbols.insert("str", PrimitiveType(PrimitiveTypes.STR)) - global_symbols.insert("float", PrimitiveType(PrimitiveTypes.FLOAT)) - - self.symbol_tables: dict[int, SymbolTable] = {0: global_symbols} - self.using_st: int = 0 - self.parsing_lambda_parameters = False - self.op_stack: list[str] = [] - # temp solution - self.current_number_of_advances = 0 - self.already_parsing_sya = False - - def resolve_type(self, ty: Expr) -> None: - pass - - def advance(self) -> None: - self.current_number_of_advances += 1 - return super().advance() - - def peek(self, amt: int = 1) -> Token | None: - if self.at + amt > len(self.input): - return None - return self.input[self.at + amt] - - def lookup(self, name: str, symbol_table_id: int | None = None) -> Symbol | None: - symbol_table_id = self.using_st if symbol_table_id is None else symbol_table_id - if symbol_table_id is None or symbol_table_id not in self.symbol_tables: - return None - symbol_table = self.symbol_tables[symbol_table_id] - symbol = symbol_table.lookup(name) - if symbol is None and (symbol_table.parent is not None): - return self.lookup(name, symbol_table.parent) - return symbol - - def parse_all(self) -> None: - while c := self.current(): - if c == None: - break - parsed = self.parse() - if parsed is None: - break - self.results.append(parsed) - self.current_number_of_advances = 0 - - def parse(self) -> Expr | None: - c = self.current() - result: Expr | None = None - if c is None: - result = None - elif c.ty == TT.PRIME_FORM: - self.advance() - if (next := self.current()) and next.ty != TT.IDENT: - raise Exception( - f"Expected double colon after the prime form...got {next}" - ) - ident = self.parse() - self.advance() - parts: list[Expr] = [] - while True: - c = self.current() - if c is None: - break - elif c.ty == TT.PIPE: - self.advance() - continue - part = self.parse() - if ( - not isinstance(part, Tuple) - and not isinstance(part, Reference) - and not isinstance(part, PrimitiveType) - and not isinstance(part, DataVariantWithInnerValue) - and ( - not isinstance(part, Identifier) - or (isinstance(part, Identifier) and part.for_assignment) - ) - ): - self.at -= 1 - break - - parts.append(part) - - result = CustomDataType(ident, parts) - elif c.ty == TT.LITERAL: - if c.prim_ty is None or c.val is None: - raise Exception("Invalid primitive type...how?") - self.advance() - result = Literal(PrimitiveType(c.prim_ty), c.val) - elif c.ty == TT.IDENT: - if c.val is None: - raise Exception("Identifier with no value?") - symbol = self.lookup(c.val) - if symbol is not None: - self.advance() - result = Reference(c.val, symbol.belongs_to, symbol.id) - else: - next = self.input[self.at + 1] - if ( - next is not None - and next.ty is TT.IDENT - and self.parsing_lambda_parameters - and self.using_st in self.symbol_tables - ): - self.advance() - sym_table = self.symbol_tables[self.using_st] - expr = self.parse() - if expr is not None and isinstance(expr, Reference): - sym_table.insert(c.val, expr) - result = Parameter() - elif next.ty is TT.OPEN_PAREN: - self.advance() - paren = self.parse() - # raise Exception(f"Enum value(?): {c.val} -> {paren}") - result = DataVariantWithInnerValue(Identifier(c.val), paren) - else: - self.advance() - for_assignment = False - if ( - (c2 := self.current()) - and c2 is not None - and c2.ty == TT.BACKSLASH - ): - for_assignment = True - result = Identifier(c.val, for_assignment) - - elif c.ty == TT.OPEN_PAREN: - self.advance() - the_between: list[Expr] = [] - has_comma: bool = False - while True: - c = self.current() - if c is not None: - if c.ty == TT.CLOSE_PAREN: - self.advance() - break - elif c.ty == TT.COMMA: - self.advance() - has_comma = True - continue - - expr = self.parse() - if expr is None: - self.at -= 1 - break - the_between.append(expr) - if len(the_between) == 0: - # We init Parenthesized with no expression so - # that it is treated as an empty tuple, non value - # or dead value. Its just a placeholder ig? - result = Parenthesized(ty=self.lookup("unit", 0)) - elif len(the_between) == 1: - # Init Parenthesized with an expression (the_between[0]) - # to do exactly what it says... for example (\ :: int ...) - result = Parenthesized(the_between[0]) - elif len(the_between) > 1 and has_comma: - # Handle tuples - result = Tuple(the_between) - - # TODO: handle all function call arg parsing - # function calls can use () but are not required - # all this should be handled down where the - # infix operators are but check if the previous - # expression was a reference, or fn def and - # then match arguments with arguments. - elif c.ty == TT.BACKSLASH: - self.advance() - symbol_table_id = list(self.symbol_tables.items())[-1][0] + 1 - lambda_symbol_table = SymbolTable(symbol_table_id, 0) - self.symbol_tables[symbol_table_id] = lambda_symbol_table - self.using_st = symbol_table_id - self.parsing_lambda_parameters = True - while True: - c = self.current() - if c is not None: - if c.ty == TT.COMMA: - self.advance() - continue - elif c.ty == TT.DOUBLE_COLON: - self.advance() - ret_type = self.parse() - if ret_type is None: - raise Exception( - f"Return type was not there or non identifier ({ret_type})" - ) - lambda_symbol_table.insert("ret", ret_type) - self.symbol_tables[symbol_table_id] = lambda_symbol_table - c = self.current() - if c is None or c.ty is not TT.FUNCTION_ARROW: - raise Exception( - f"Expected f.n. arrow after ret type... ({c})" - ) - self.advance() - break - expr = self.parse() - if expr is None or not isinstance(expr, Parameter): - self.at -= 1 - break - body = self.parse() - if body is None: - raise Exception(f"Lambda must have body {self.current()}") - popped = self.results.pop() - if not isinstance(popped, Identifier): - return popped - _lambda = Lambda(lambda_symbol_table, body) - self.using_st = 0 - self.symbol_tables[self.using_st].insert(popped.value, _lambda) - result = Assignment( - popped, - _lambda, - ) - - # At this point, past previous parsing, we should have advanced past - # the last token and now be face-to-face with the rare, elusive, OP! - c = self.current() - - if ( - isinstance(result, Reference) - and 0 <= result.belongs_to < len(self.symbol_tables) - # and c is not None - ): - st = self.symbol_tables[result.belongs_to] - symbol = st.lookup_by_id(result.symbol_id) - if symbol is None: - raise Exception(f"Unkown symbol reference: {result}") - if isinstance(symbol.val, Lambda): - parameters = symbol.val.parameters - p_len = len(parameters.symbols.keys()) - # if its 1 then it HAS to be the return type...right? - # we can pass on doing anything. Leave the reference - # as is as no further handling is needed. However, - # we will flip the "copy_val" on the reference so later - # steps can quickly handle it - if p_len == 1: - result.copy_val = True - return result - elif p_len > 1: - # NOW we have more arguments so we will want to parse more. - self.current_number_of_advances = 0 - possible_args: list[Expr] = [] - - for k, ref in parameters.symbols.items(): - if ref.name == "ret": - continue - elif not isinstance(ref.val, Reference): - break - type_symbol = self.lookup(ref.val.name, ref.val.belongs_to) - if type_symbol is None or not isinstance( - type_symbol.val, PrimitiveType - ): - break - possible_arg = self.parse() - if ( - possible_arg is None - or possible_arg.ty is None - or not isinstance(possible_arg.ty, Symbol) - ): - print( - f"Null type? 1. {possible_arg is None} 2. {possible_arg.ty is None} 3. {not isinstance(possible_arg.ty, Symbol)} {bcolors.OKCYAN}{bcolors.BOLD}({possible_arg.ty}){bcolors.ENDC}" - ) - break - if isinstance( - possible_arg.ty.val, PrimitiveType - ) and isinstance(type_symbol.val, PrimitiveType): - if possible_arg.ty.val != type_symbol.val: - raise Exception( - f"{bcolors.FAIL}{bcolors.BOLD}Type mismatch{bcolors.ENDC}" - ) - if len(possible_args) == 0: - self.at = self.at - self.current_number_of_advances - self.current_number_of_advances = 0 - return result - - possible_op = get_op(c) - if not check_is_allowed(result) or result is None or self.already_parsing_sya: - return result - if possible_op is None: - return result - - self.advance() - # NOW WE START! begin working with the shunting yard algorithm - # for parsing arithmetic - self.op_stack.append(possible_op[0]) - self.already_parsing_sya = True - res: list[Expr] = [result] - while True: - c = self.current() - possible_op = get_op(c) - if c is None: - break - elif possible_op is not None: - self.op_stack.append(possible_op[0]) - self.advance() - continue - - parsed = self.parse() - if not check_is_allowed(parsed) or parsed is None: - self.at -= 1 - break - res.append(parsed) - - sya_res = ShuntingYardAlgorithmResults(self.op_stack, res) - self.op_stack = [] - self.already_parsing_sya = False - return sya_res # type: ignore diff --git a/types.py b/types.py deleted file mode 100644 index 5b46e35..0000000 --- a/types.py +++ /dev/null @@ -1,22 +0,0 @@ -from ast_1 import Assignment, Expr, Lambda -from enum import Enum, auto - - -class PrimitiveTypes(Enum): - INT = auto() - STR = auto() - FLOAT = auto() - # () - UNIT = auto() - - -class Type(Expr): - def __init__(self, inner: PrimitiveTypes | Expr): - super().__init__(inner) - self.inner = inner - self.is_lambda = isinstance(inner, Lambda) - if isinstance(inner, Assignment): - right = inner.right - if isinstance(right, Lambda): - self.inner = right -# type: ignore From c912873bd71460a5ead1de745a1cbf742fad73ee Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 14 Dec 2024 23:14:48 -0500 Subject: [PATCH 09/15] remove unused imports --chore --- ast_1.py | 9 +++------ ast_exprs.py | 4 ++-- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/ast_1.py b/ast_1.py index 63978c5..4ccb9b1 100644 --- a/ast_1.py +++ b/ast_1.py @@ -1,9 +1,6 @@ -from abc import ABC, abstractmethod -from ast import Expr, Tuple -from os import read -import re -from enum import Enum, auto -from typing import Generic, Callable, Self, Type, TypeVar, Union +from abc import ABC +from ast import Expr +from typing import Generic, Callable, TypeVar from common import TT, PrimitiveTypes, bcolors, operators from ast_exprs import ( diff --git a/ast_exprs.py b/ast_exprs.py index 1e3565c..5acbe54 100644 --- a/ast_exprs.py +++ b/ast_exprs.py @@ -1,6 +1,6 @@ from abc import ABC -from common import TT, PrimitiveTypes, bcolors, operators -from typing import Type, Any, Union +from common import PrimitiveTypes, bcolors +from typing import Any, Union class AstirExpr(ABC): def __init__(self, ty: Union['PrimitiveTypes', 'AstirExpr']): From 49fe7b2723c0ee3d43768849c52f150b0816bbe3 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sun, 15 Dec 2024 18:03:01 -0500 Subject: [PATCH 10/15] asm work --- README.md | 3 +-- asm.py | 42 +++++++++++++++++++++++++++--------------- ast_1.py | 21 +-------------------- boot.py | 7 ++++--- common.py | 19 +++++++++++++++++++ whatever.s | 2 +- 6 files changed, 53 insertions(+), 41 deletions(-) diff --git a/README.md b/README.md index 8c6f539..0509ccb 100644 --- a/README.md +++ b/README.md @@ -10,5 +10,4 @@ - `m'` and `t'` are "prime forms". open issue for naming ideas or concerns! More on these later:tm: -justin@Justins-MBP-2 Cylindropuntia % as -o whatever.o whatever.s -justin@Justins-MBP-2 Cylindropuntia % ld -macosx_version_min 13.0.0 -o whatever whatever.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e _start -arch arm64 \ No newline at end of file +as -o whatever.o whatever.s && ld -macosx_version_min 13.0.0 -o whatever whatever.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e main -arch arm64 \ No newline at end of file diff --git a/asm.py b/asm.py index d52eb95..2fd7c48 100644 --- a/asm.py +++ b/asm.py @@ -1,18 +1,30 @@ -# from ast import Assignment, Cursor, Expr, Identifier, Lambda, Token # type: ignore +from ast_exprs import Assignment, AstirExpr, Identifier, Lambda, Symbol # type: ignore +from common import Cursor -# class ASM(Cursor): -# def __init__(self, input: list[Expr]) -> None: -# super().__init__(input) -# self.lines: list[str] = [".p2align 3"] +class ASM(Cursor): + def __init__(self, input: list[AstirExpr]) -> None: + super().__init__(input) + self.lines: list[str] = [".global _start", ".p2align 3"] -# def generate(self) -> None: -# c_expr = self.current() -# if isinstance(c_expr, Assignment) and isinstance(c_expr.left, Identifier): -# name = c_expr.left.value -# if isinstance(c_expr.right, Lambda): -# print(f"Handling lambda assignment {c_expr.left}.") -# self.lines.append(f"{name}:") -# return None -# print(f"!! {c_expr}") -# return None \ No newline at end of file + def generate(self) -> None: + c_expr = self.current() + if isinstance(c_expr, Assignment): + if isinstance(c_expr.right, Lambda) and isinstance(c_expr.left, Identifier): + lambda_def = c_expr.right.definition + parameters_to_registers: dict[str, str] = {} + last_reg = 0 + for k in lambda_def.parameters.symbols.keys(): + parameter = lambda_def.parameters.symbols.get(k) + if parameter is None or not isinstance(parameter, Symbol): + break + elif parameter.name == "ret": + continue + parameters_to_registers[parameter.name] = f"X{last_reg}" + last_reg += 1 + # -1 to get rid of the return type. the length in the name may be temporary :) + self.lines.append( + f"{c_expr.left.value}_{len(lambda_def.parameters.symbols.keys())-1}:" + ) + pass + return None diff --git a/ast_1.py b/ast_1.py index 4ccb9b1..e701e4d 100644 --- a/ast_1.py +++ b/ast_1.py @@ -2,7 +2,7 @@ from ast import Expr from typing import Generic, Callable, TypeVar -from common import TT, PrimitiveTypes, bcolors, operators +from common import TT, Cursor, PrimitiveTypes, bcolors, operators from ast_exprs import ( AstirExpr, ShuntingYardAlgorithmResults, @@ -50,25 +50,6 @@ def get_op(possible_op: Token | None) -> tuple[str, dict[str, int]] | None: def is_valid_ident(c: str) -> bool: return c.isalnum() or c == "_" - -T = TypeVar("T") - - -class Cursor(ABC, Generic[T]): - def __init__(self, input: list[T]) -> None: - super().__init__() - self.input = input - self.at = 0 - - def advance(self) -> None: - self.at += 1 - - def current(self) -> T | None: - if self.at >= len(self.input): - return None - return self.input[self.at] - - class Lexer(Cursor): def __init__(self, input: str) -> None: super().__init__(list(input)) diff --git a/boot.py b/boot.py index dfa62b6..8ecdce4 100644 --- a/boot.py +++ b/boot.py @@ -1,3 +1,4 @@ +from asm import ASM from ast_1 import Parser, Lexer # type: ignore #from asm import ASM # type: ignore @@ -11,9 +12,9 @@ def run(): parser = Parser(lexer.results) parser.parse_all() print(f"{parser.results}\n\n") - # code_generator = ASM(parser.results) - # code_generator.generate() - # print(f"{code_generator.lines}") + code_generator = ASM(parser.results) + code_generator.generate() + print(f"{code_generator.lines}") if __name__ == "__main__": diff --git a/common.py b/common.py index b9be42e..1479f96 100644 --- a/common.py +++ b/common.py @@ -1,4 +1,6 @@ +from abc import ABC from enum import Enum, auto +from typing import Generic, TypeVar # https://stackoverflow.com/questions/287871/how-do-i-print-colored-text-to-the-terminal @@ -54,3 +56,20 @@ class TT(Enum): "associativity": 0, }, } + +T = TypeVar("T") + + +class Cursor(ABC, Generic[T]): + def __init__(self, input: list[T]) -> None: + super().__init__() + self.input = input + self.at = 0 + + def advance(self) -> None: + self.at += 1 + + def current(self) -> T | None: + if self.at >= len(self.input): + return None + return self.input[self.at] diff --git a/whatever.s b/whatever.s index fa49937..f378b04 100644 --- a/whatever.s +++ b/whatever.s @@ -5,7 +5,7 @@ // X16 - linux function number // .global _start // Provide program starting address to linker -.p2align 3 // Feedback from Peter +.p2align 3 _return_int: add X1, X0, 2 From f1c4e0395f438ed80e5b1e045ea70098abe89a61 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Thu, 19 Dec 2024 16:11:46 -0500 Subject: [PATCH 11/15] gitignore --- .gitignore | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.gitignore b/.gitignore index 0349727..30dbf9e 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,9 @@ Main *.o *.hi __pycache__ +whatever +whatever.s +*.s +*.o +boot whatever \ No newline at end of file From a6d0cc0b58df0ac78deddaf0fbfa32f0585d7f64 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Thu, 19 Dec 2024 16:11:57 -0500 Subject: [PATCH 12/15] assembly now --- README.md | 2 +- asm.py | 92 +++++++++++++++++++++++++++++++++++++++------------- ast_1.py | 6 ++-- ast_exprs.py | 2 +- boot.dal | 2 +- boot.py | 9 +++-- whatever.s | 68 +++++++------------------------------- 7 files changed, 94 insertions(+), 87 deletions(-) diff --git a/README.md b/README.md index 0509ccb..e6f125c 100644 --- a/README.md +++ b/README.md @@ -10,4 +10,4 @@ - `m'` and `t'` are "prime forms". open issue for naming ideas or concerns! More on these later:tm: -as -o whatever.o whatever.s && ld -macosx_version_min 13.0.0 -o whatever whatever.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e main -arch arm64 \ No newline at end of file +as -o whatever.o whatever.s && ld -macosx_version_min 13.0.0 -o whatever whatever.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e _start -arch arm64 \ No newline at end of file diff --git a/asm.py b/asm.py index 2fd7c48..b534ca2 100644 --- a/asm.py +++ b/asm.py @@ -1,30 +1,78 @@ -from ast_exprs import Assignment, AstirExpr, Identifier, Lambda, Symbol # type: ignore -from common import Cursor +from ast_exprs import Assignment, AstirExpr, Identifier, Lambda, Literal, Reference, ShuntingYardAlgorithmResults, Symbol, SymbolTable # type: ignore +from common import Cursor, PrimitiveTypes class ASM(Cursor): - def __init__(self, input: list[AstirExpr]) -> None: + def __init__( + self, input: list[AstirExpr], symbol_tables: dict[int, SymbolTable] + ) -> None: super().__init__(input) - self.lines: list[str] = [".global _start", ".p2align 3"] + self.lines: list[str] = [".global main", ".p2align 3"] + self.symbol_tables: dict[int, SymbolTable] = symbol_tables + # Format (ref_id, register) + self.ref_id_and_register: list[tuple[int, int]] = [] + self.current_usable_register = 0 - def generate(self) -> None: - c_expr = self.current() + def lookup_symbol(self, symbol_table: int, symbol_id: int) -> Symbol | None: + if symbol_table not in self.symbol_tables: + return None + _symbol_table: SymbolTable | None = self.symbol_tables[symbol_table] + if _symbol_table is None: + return None + symbol = _symbol_table.lookup_by_id(symbol_id) + return symbol + + def generate(self, expr: AstirExpr | None = None) -> list[str]: + c_expr = self.current() if expr is None else expr + to_add: list[str] = [] if isinstance(c_expr, Assignment): if isinstance(c_expr.right, Lambda) and isinstance(c_expr.left, Identifier): - lambda_def = c_expr.right.definition - parameters_to_registers: dict[str, str] = {} - last_reg = 0 - for k in lambda_def.parameters.symbols.keys(): - parameter = lambda_def.parameters.symbols.get(k) - if parameter is None or not isinstance(parameter, Symbol): - break - elif parameter.name == "ret": - continue - parameters_to_registers[parameter.name] = f"X{last_reg}" - last_reg += 1 - # -1 to get rid of the return type. the length in the name may be temporary :) - self.lines.append( - f"{c_expr.left.value}_{len(lambda_def.parameters.symbols.keys())-1}:" + to_add.append(f"{c_expr.left.value}:") + print(f"Being added: {to_add}") + generated_body = self.generate(c_expr.right.body) + print(f"Generated body -> {generated_body}") + to_add.extend(generated_body) + to_add.append("ret") + self.lines.extend(to_add) + elif isinstance(c_expr, Reference): + symbol = self.lookup_symbol(c_expr.belongs_to, c_expr.symbol_id) + if symbol is None: + raise Exception("failed to lookup symbol") + if isinstance(symbol.val, Reference): + symbol2 = self.lookup_symbol( + symbol.val.belongs_to, symbol.val.symbol_id ) - pass - return None + register = self.current_usable_register + self.ref_id_and_register.append((symbol.val.symbol_id, register)) + to_add.append(f"x{register}") # Temp + self.current_usable_register += 1 + elif isinstance(c_expr, ShuntingYardAlgorithmResults): + if len(c_expr.oeprators) > 0: + raise Exception("Invalid shunting yard algorithm") + stack: list[str] = [] + c_expr.results.reverse() + while len(c_expr.results) > 0 and (term := c_expr.results.pop()): + # TODO: make some like class method or something + # to make this cleaner?? + if isinstance(term, Reference): + stack.extend(self.generate(term)) + elif isinstance(term, Literal): + if term.ty != PrimitiveTypes.INT: + raise Exception("Unexpected type.") + stack.append(str(term.val)) + elif isinstance(term, str): + if term == "+": + stack.reverse() + (item1, item2) = (stack.pop(), stack.pop()) + if not item1.startswith("x"): + register = self.current_usable_register + to_add.append(f"mov x{register}, {item1}") + item1 = f"x{register}" + self.current_usable_register += 1 + print( + f"Adding last two items on stack: {item1}, {item2} = {item1 + item2}" + ) + to_add.append(f"add x0, {item1}, {item2}") + + print(f"{c_expr}") + return to_add diff --git a/ast_1.py b/ast_1.py index e701e4d..4d4191b 100644 --- a/ast_1.py +++ b/ast_1.py @@ -488,7 +488,7 @@ def parse(self) -> AstirExpr | None: # for parsing arithmetic self.op_stack.append(possible_op[0]) self.already_parsing_sya = True - res: list[AstirExpr] = [result] + res: list[AstirExpr | str] = [result] while True: c = self.current() possible_op = get_op(c) @@ -504,8 +504,8 @@ def parse(self) -> AstirExpr | None: self.at -= 1 break res.append(parsed) - - sya_res = ShuntingYardAlgorithmResults(self.op_stack, res) + res.extend(self.op_stack) self.op_stack = [] + sya_res = ShuntingYardAlgorithmResults(self.op_stack, res) self.already_parsing_sya = False return sya_res # type: ignore diff --git a/ast_exprs.py b/ast_exprs.py index 5acbe54..bd8fa6e 100644 --- a/ast_exprs.py +++ b/ast_exprs.py @@ -108,7 +108,7 @@ def __repr__(self) -> str: class ShuntingYardAlgorithmResults(AstirExpr): - def __init__(self, operators: list[str], results: list[AstirExpr]) -> None: + def __init__(self, operators: list[str], results: list[AstirExpr | str]) -> None: super().__init__(PrimitiveTypes.UNIT) self.oeprators = operators self.results = results diff --git a/boot.dal b/boot.dal index 45daa56..9787485 100644 --- a/boot.dal +++ b/boot.dal @@ -1,3 +1,3 @@ take_int_and_add_2\ x int :: int → x + 2 -main\ :: int → take_int_and_add_2 2 \ No newline at end of file +_start\ :: int → take_int_and_add_2 2 \ No newline at end of file diff --git a/boot.py b/boot.py index 8ecdce4..79ed381 100644 --- a/boot.py +++ b/boot.py @@ -1,7 +1,7 @@ from asm import ASM -from ast_1 import Parser, Lexer # type: ignore +from ast_1 import Parser, Lexer # type: ignore -#from asm import ASM # type: ignore +# from asm import ASM # type: ignore def run(): @@ -12,9 +12,12 @@ def run(): parser = Parser(lexer.results) parser.parse_all() print(f"{parser.results}\n\n") - code_generator = ASM(parser.results) + code_generator = ASM(parser.results, parser.symbol_tables) + code_generator.generate() + code_generator.advance() code_generator.generate() print(f"{code_generator.lines}") + open("boot.s", "w+").write("\n".join(code_generator.lines)) if __name__ == "__main__": diff --git a/whatever.s b/whatever.s index f378b04..66b5791 100644 --- a/whatever.s +++ b/whatever.s @@ -1,70 +1,26 @@ -// Assembler program to print "Hello World!" -// to stdout. -// -// X0-X2 - parameters to linux function services -// X16 - linux function number -// -.global _start // Provide program starting address to linker +.global _start .p2align 3 _return_int: -add X1, X0, 2 -mov X0, X1 +add X0, X0, 2 ret _start: mov X0, 2 - bl _return_int // This take the 2 from X0 and adds 2 to it - cmp X0, 4 // We want to check if the rseult from _return_in - // is eq to 4 + bl _return_int + cmp X0, 4 b.eq print_hello_world - mov X0, #0 // Use 0 return code - mov X16, #1 // Service command code 1 terminates this program - svc 0 // Call MacOS to terminate the program + mov X0, #0 + mov X16, #1 + svc 0 -// Setup the parameters to print hello world -// and then call Linux to do it. -print_hello_world: mov X0, #1 // 1 = StdOut - adr X1, helloworld // string to print - mov X2, #13 // length of our string - mov X16, #4 // MacOS write system call - svc 0 // Call linux to output the string +print_hello_world: mov X0, #1 + adr X1, helloworld + mov X2, #13 + mov X16, #4 + svc 0 ret -; print_goodbye: mov X0, #1 // 1 = StdOut -; adr X1, goodbyeworld // string to print -; mov X2, #13 // length of our string -; mov X16, #4 // MacOS write system call -; svc 0 // Call linux to output the string -; ret - -; _start: MOV X10, 1 -; MOV X11, 4 -; MOV X12, helloworld -; MOV X13, goodbyeworld -; ADD X9, X10, X11 -; CMP X9, 3 -; csel X12, X14, X13, EQ - -; ; B.EQ print_hello_world -; ; B.NE print_goodbye - -; mov X0, #1 // 1 = StdOut -; adr X1, X14 // string to print` -; mov X2, #13 // length of our string -; mov X16, #4 // MacOS write system call -; svc 0 // Call linux to output the string -/* -mov X0, #1 // 1 = StdOut - adr X1, 123 // string to print - mov X2, #3 // length of our string - mov X16, #4 // MacOS write system call - svc 0 // Call linux to output the string - -*/ -// Setup the parameters to exit the program -// and then call Linux to do it. - helloworld: .ascii "Hello World!\n" // 13 \ No newline at end of file From 4b914e5d81609e54ad31a5470f7ef57001a7987a Mon Sep 17 00:00:00 2001 From: ibx34 Date: Fri, 20 Dec 2024 11:56:37 -0500 Subject: [PATCH 13/15] awalys working --- README.md | 2 +- asm.py | 53 +++++++++++++++++++++++++++++++++++++++++++++++----- ast_1.py | 5 +++-- ast_exprs.py | 3 ++- boot.py | 4 +--- 5 files changed, 55 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index e6f125c..c48d4b7 100644 --- a/README.md +++ b/README.md @@ -10,4 +10,4 @@ - `m'` and `t'` are "prime forms". open issue for naming ideas or concerns! More on these later:tm: -as -o whatever.o whatever.s && ld -macosx_version_min 13.0.0 -o whatever whatever.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e _start -arch arm64 \ No newline at end of file +as -o boot.o boot.s && ld -macosx_version_min 13.0.0 -o boot boot.o -lSystem -syslibroot `xcrun -sdk macosx --show-sdk-path` -e _start -arch arm64 \ No newline at end of file diff --git a/asm.py b/asm.py index b534ca2..818db6e 100644 --- a/asm.py +++ b/asm.py @@ -1,18 +1,35 @@ -from ast_exprs import Assignment, AstirExpr, Identifier, Lambda, Literal, Reference, ShuntingYardAlgorithmResults, Symbol, SymbolTable # type: ignore +from ast_exprs import Application, Assignment, AstirExpr, Identifier, Lambda, Literal, Reference, ShuntingYardAlgorithmResults, Symbol, SymbolTable # type: ignore from common import Cursor, PrimitiveTypes +class Register: + def __init__(self, reg: int, being_used_by: int): + self.being_used_by = being_used_by + self.reg = reg + + class ASM(Cursor): def __init__( self, input: list[AstirExpr], symbol_tables: dict[int, SymbolTable] ) -> None: super().__init__(input) - self.lines: list[str] = [".global main", ".p2align 3"] + self.lines: list[str] = [".global _start", ".p2align 3"] self.symbol_tables: dict[int, SymbolTable] = symbol_tables # Format (ref_id, register) self.ref_id_and_register: list[tuple[int, int]] = [] + self.fn_register_man: dict[int, list[tuple[int, int]]] = {} self.current_usable_register = 0 + # This should be cleared after every generate call. + self.registers_in_use: list[Register] = [] + + def generate_all(self): + while self.current() is not None: + generated = self.generate() + self.lines.extend(generated) + self.advance() + self.registers_in_use = [] + def lookup_symbol(self, symbol_table: int, symbol_id: int) -> Symbol | None: if symbol_table not in self.symbol_tables: return None @@ -27,13 +44,40 @@ def generate(self, expr: AstirExpr | None = None) -> list[str]: to_add: list[str] = [] if isinstance(c_expr, Assignment): if isinstance(c_expr.right, Lambda) and isinstance(c_expr.left, Identifier): + symbols = c_expr.right.definition.parameters.symbols + + # Reserve registers for the arguments!!! + def reserve_register(x: int) -> tuple[int, int]: + ret = (x, self.current_usable_register) + self.current_usable_register += 1 + return ret + + reserved_registers: list[tuple[int, int]] = list( + map(reserve_register, symbols) + )[:-1] + self.fn_register_man[c_expr.right.belongs_to] = reserved_registers to_add.append(f"{c_expr.left.value}:") - print(f"Being added: {to_add}") + print(f"Being added: {c_expr.right.definition}") generated_body = self.generate(c_expr.right.body) print(f"Generated body -> {generated_body}") to_add.extend(generated_body) to_add.append("ret") - self.lines.extend(to_add) + elif isinstance(c_expr, Application): + reserved_registers = self.fn_register_man[c_expr.lambda_ref.symbol_id] + for idx, v in enumerate(reserved_registers): + if not (0 <= idx < len(c_expr.parameters)): + raise Exception(f"Invalid application") + param: AstirExpr = c_expr.parameters[idx] + if not isinstance(param, Literal) or param.ty != PrimitiveTypes.INT: + raise Exception("TODO: HANDLE MORE THAN JUST LITERALS") + to_add.append(f"mov x{v[1]}, {param.val}") + + symbol = self.lookup_symbol( + c_expr.lambda_ref.belongs_to, c_expr.lambda_ref.symbol_id + ) + if symbol is None: + raise Exception(f"failed to find symbol {c_expr.lambda_ref.symbol_id}") + to_add.append(f"bl {symbol.name}") elif isinstance(c_expr, Reference): symbol = self.lookup_symbol(c_expr.belongs_to, c_expr.symbol_id) if symbol is None: @@ -45,7 +89,6 @@ def generate(self, expr: AstirExpr | None = None) -> list[str]: register = self.current_usable_register self.ref_id_and_register.append((symbol.val.symbol_id, register)) to_add.append(f"x{register}") # Temp - self.current_usable_register += 1 elif isinstance(c_expr, ShuntingYardAlgorithmResults): if len(c_expr.oeprators) > 0: raise Exception("Invalid shunting yard algorithm") diff --git a/ast_1.py b/ast_1.py index 4d4191b..2341370 100644 --- a/ast_1.py +++ b/ast_1.py @@ -400,9 +400,10 @@ def parse(self) -> AstirExpr | None: popped = self.results.pop() if not isinstance(popped, Identifier): return popped - _lambda = Lambda(lambda_symbol_table, body) self.using_st = 0 - self.symbol_tables[self.using_st].insert(popped.value, _lambda) + symbol_table = self.symbol_tables[self.using_st] + _lambda = Lambda(lambda_symbol_table, body, symbol_table.last_id + 1) + symbol_table.insert(popped.value, _lambda) result = Assignment( popped, _lambda, diff --git a/ast_exprs.py b/ast_exprs.py index bd8fa6e..a1431d0 100644 --- a/ast_exprs.py +++ b/ast_exprs.py @@ -89,10 +89,11 @@ def __repr__(self): class Lambda(AstirExpr): - def __init__(self, parameters: SymbolTable, body: AstirExpr): + def __init__(self, parameters: SymbolTable, body: AstirExpr, belongs_to: int): lambda_def = LambdaDefinition(parameters) super().__init__(lambda_def) self.definition = lambda_def + self.belongs_to = belongs_to self.body = body def __repr__(self): diff --git a/boot.py b/boot.py index 79ed381..210eee3 100644 --- a/boot.py +++ b/boot.py @@ -13,9 +13,7 @@ def run(): parser.parse_all() print(f"{parser.results}\n\n") code_generator = ASM(parser.results, parser.symbol_tables) - code_generator.generate() - code_generator.advance() - code_generator.generate() + code_generator.generate_all() print(f"{code_generator.lines}") open("boot.s", "w+").write("\n".join(code_generator.lines)) From 67ae2de198eb0fb5c25287ead27a7f7e60c61dd7 Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 21 Dec 2024 09:25:04 -0500 Subject: [PATCH 14/15] work --- asm.py | 170 +++++++++++++++++++++++++++------------------------ ast_1.py | 7 ++- ast_exprs.py | 16 ++--- 3 files changed, 102 insertions(+), 91 deletions(-) diff --git a/asm.py b/asm.py index 818db6e..9a73349 100644 --- a/asm.py +++ b/asm.py @@ -1,13 +1,8 @@ +from abc import ABC from ast_exprs import Application, Assignment, AstirExpr, Identifier, Lambda, Literal, Reference, ShuntingYardAlgorithmResults, Symbol, SymbolTable # type: ignore from common import Cursor, PrimitiveTypes -class Register: - def __init__(self, reg: int, being_used_by: int): - self.being_used_by = being_used_by - self.reg = reg - - class ASM(Cursor): def __init__( self, input: list[AstirExpr], symbol_tables: dict[int, SymbolTable] @@ -15,20 +10,26 @@ def __init__( super().__init__(input) self.lines: list[str] = [".global _start", ".p2align 3"] self.symbol_tables: dict[int, SymbolTable] = symbol_tables - # Format (ref_id, register) - self.ref_id_and_register: list[tuple[int, int]] = [] - self.fn_register_man: dict[int, list[tuple[int, int]]] = {} - self.current_usable_register = 0 + # All tables related to registers most likely + # have keys that map to a Symbol's id. + # The inner dictionary holds the parameter index -> register + # map. TODO: introduce a way to recognize different + # register sizes for a64 (rn) + self.fn_register_store: dict[int, dict[int, int]] = {} + self.inside_fn: int | None = None + # # Format (ref_id, register) + # self.ref_id_and_register: list[tuple[int, int]] = [] + # self.fn_register_man: dict[int, list[tuple[int, int]]] = {} + # self.current_usable_register = 0 - # This should be cleared after every generate call. - self.registers_in_use: list[Register] = [] + # # This should be cleared after every generate call. + # self.registers_in_use: list[Register] = [] def generate_all(self): while self.current() is not None: generated = self.generate() self.lines.extend(generated) self.advance() - self.registers_in_use = [] def lookup_symbol(self, symbol_table: int, symbol_id: int) -> Symbol | None: if symbol_table not in self.symbol_tables: @@ -45,77 +46,84 @@ def generate(self, expr: AstirExpr | None = None) -> list[str]: if isinstance(c_expr, Assignment): if isinstance(c_expr.right, Lambda) and isinstance(c_expr.left, Identifier): symbols = c_expr.right.definition.parameters.symbols + last_used_register = 0 + lambda_param_to_register: dict[int, int] = {} + for symbol_idx in symbols: + symbol = symbols[symbol_idx] + if symbol.name == "ret": + # We break instead of continue because "ret" + # should always be the last item in the dict + break + lambda_param_to_register[symbol_idx] = last_used_register + last_used_register += 1 - # Reserve registers for the arguments!!! - def reserve_register(x: int) -> tuple[int, int]: - ret = (x, self.current_usable_register) - self.current_usable_register += 1 - return ret - - reserved_registers: list[tuple[int, int]] = list( - map(reserve_register, symbols) - )[:-1] - self.fn_register_man[c_expr.right.belongs_to] = reserved_registers - to_add.append(f"{c_expr.left.value}:") - print(f"Being added: {c_expr.right.definition}") - generated_body = self.generate(c_expr.right.body) - print(f"Generated body -> {generated_body}") - to_add.extend(generated_body) + self.fn_register_store[c_expr.right.symbol_id] = lambda_param_to_register + # This is so we can parse and get the correct arguments + self.inside_fn = c_expr.right.symbol_id + to_add.append(f"{c_expr.left.value}: // Symbol ID: {c_expr.right.symbol_id}") + to_add.extend(self.generate(c_expr.right.body)) to_add.append("ret") - elif isinstance(c_expr, Application): - reserved_registers = self.fn_register_man[c_expr.lambda_ref.symbol_id] - for idx, v in enumerate(reserved_registers): - if not (0 <= idx < len(c_expr.parameters)): - raise Exception(f"Invalid application") - param: AstirExpr = c_expr.parameters[idx] - if not isinstance(param, Literal) or param.ty != PrimitiveTypes.INT: - raise Exception("TODO: HANDLE MORE THAN JUST LITERALS") - to_add.append(f"mov x{v[1]}, {param.val}") - - symbol = self.lookup_symbol( - c_expr.lambda_ref.belongs_to, c_expr.lambda_ref.symbol_id - ) - if symbol is None: - raise Exception(f"failed to find symbol {c_expr.lambda_ref.symbol_id}") - to_add.append(f"bl {symbol.name}") elif isinstance(c_expr, Reference): - symbol = self.lookup_symbol(c_expr.belongs_to, c_expr.symbol_id) - if symbol is None: - raise Exception("failed to lookup symbol") - if isinstance(symbol.val, Reference): - symbol2 = self.lookup_symbol( - symbol.val.belongs_to, symbol.val.symbol_id - ) - register = self.current_usable_register - self.ref_id_and_register.append((symbol.val.symbol_id, register)) - to_add.append(f"x{register}") # Temp + pass elif isinstance(c_expr, ShuntingYardAlgorithmResults): - if len(c_expr.oeprators) > 0: - raise Exception("Invalid shunting yard algorithm") - stack: list[str] = [] - c_expr.results.reverse() - while len(c_expr.results) > 0 and (term := c_expr.results.pop()): - # TODO: make some like class method or something - # to make this cleaner?? - if isinstance(term, Reference): - stack.extend(self.generate(term)) - elif isinstance(term, Literal): - if term.ty != PrimitiveTypes.INT: - raise Exception("Unexpected type.") - stack.append(str(term.val)) - elif isinstance(term, str): - if term == "+": - stack.reverse() - (item1, item2) = (stack.pop(), stack.pop()) - if not item1.startswith("x"): - register = self.current_usable_register - to_add.append(f"mov x{register}, {item1}") - item1 = f"x{register}" - self.current_usable_register += 1 - print( - f"Adding last two items on stack: {item1}, {item2} = {item1 + item2}" - ) - to_add.append(f"add x0, {item1}, {item2}") + if self.inside_fn is None: + raise Exception() + print(f"CURRENT FN: {self.inside_fn} AND ITS PARAMS: {self.fn_register_store[self.inside_fn]}") + pass + # elif isinstance(c_expr, Application): + # reserved_registers = self.fn_register_man[c_expr.lambda_ref.symbol_id] + # for idx, v in enumerate(reserved_registers): + # if not (0 <= idx < len(c_expr.parameters)): + # raise Exception(f"Invalid application") + # param: AstirExpr = c_expr.parameters[idx] + # if not isinstance(param, Literal) or param.ty != PrimitiveTypes.INT: + # raise Exception("TODO: HANDLE MORE THAN JUST LITERALS") + # to_add.append(f"mov x{v[1]}, {param.val}") + + # symbol: Symbol | None = self.lookup_symbol( + # c_expr.lambda_ref.belongs_to, c_expr.lambda_ref.symbol_id + # ) + # if symbol is None: + # raise Exception(f"failed to find symbol {c_expr.lambda_ref.symbol_id}") + # to_add.append(f"bl {symbol.name}") + # elif isinstance(c_expr, Reference): + # symbol = self.lookup_symbol(c_expr.belongs_to, c_expr.symbol_id) + # if symbol is None: + # raise Exception("failed to lookup symbol") + # if isinstance(symbol.val, Reference): + # symbol2 = self.lookup_symbol( + # symbol.val.belongs_to, symbol.val.symbol_id + # ) + # register = self.current_usable_register + # self.ref_id_and_register.append((symbol.val.symbol_id, register)) + # to_add.append(f"x{register}") # Temp + # elif isinstance(c_expr, ShuntingYardAlgorithmResults): + # if len(c_expr.oeprators) > 0: + # raise Exception("Invalid shunting yard algorithm") + # stack: list[str] = [] + # c_expr.results.reverse() + # while len(c_expr.results) > 0 and (term := c_expr.results.pop()): + # # TODO: make some like class method or something + # # to make this cleaner?? + # if isinstance(term, Reference): + # stack.extend(self.generate(term)) + # elif isinstance(term, Literal): + # if term.ty != PrimitiveTypes.INT: + # raise Exception("Unexpected type.") + # stack.append(str(term.val)) + # elif isinstance(term, str): + # if term == "+": + # stack.reverse() + # (item1, item2) = (stack.pop(), stack.pop()) + # if not item1.startswith("x"): + # register = self.current_usable_register + # to_add.append(f"mov x{register}, {item1}") + # item1 = f"x{register}" + # self.current_usable_register += 1 + # print( + # f"Adding last two items on stack: {item1}, {item2} = {item1 + item2}" + # ) + # to_add.append(f"add x0, {item1}, {item2}") - print(f"{c_expr}") + # print(f"{c_expr}") return to_add diff --git a/ast_1.py b/ast_1.py index 2341370..568abe9 100644 --- a/ast_1.py +++ b/ast_1.py @@ -239,6 +239,7 @@ def parse_all(self) -> None: break self.results.append(parsed) self.current_number_of_advances = 0 + self.using_st = 0 def parse(self) -> AstirExpr | None: c = self.current() @@ -363,6 +364,7 @@ def parse(self) -> AstirExpr | None: # then match arguments with arguments. elif c.ty == TT.BACKSLASH: self.advance() + previous_symbol_table_id = self.using_st symbol_table_id = list(self.symbol_tables.items())[-1][0] + 1 lambda_symbol_table = SymbolTable(symbol_table_id, 0) self.symbol_tables[symbol_table_id] = lambda_symbol_table @@ -400,9 +402,8 @@ def parse(self) -> AstirExpr | None: popped = self.results.pop() if not isinstance(popped, Identifier): return popped - self.using_st = 0 - symbol_table = self.symbol_tables[self.using_st] - _lambda = Lambda(lambda_symbol_table, body, symbol_table.last_id + 1) + symbol_table = self.symbol_tables[previous_symbol_table_id] + _lambda = Lambda(lambda_symbol_table, body, previous_symbol_table_id, symbol_table.usable_id) symbol_table.insert(popped.value, _lambda) result = Assignment( popped, diff --git a/ast_exprs.py b/ast_exprs.py index a1431d0..81d2461 100644 --- a/ast_exprs.py +++ b/ast_exprs.py @@ -39,7 +39,7 @@ class SymbolTable: def __init__(self, id: int, parent: int | None = None) -> None: self.symbols: dict[int, Symbol] = {} self.name_to_id: dict[str, int] = {} - self.last_id = 0 + self.usable_id = 0 self.id = id self.parent = parent @@ -55,10 +55,11 @@ def lookup_by_id(self, id: int) -> Symbol | None: return self.symbols[id] def insert(self, name: str, val: AstirExpr) -> None: - self.last_id += 1 - symbol = Symbol(name, val, self.id, self.last_id) - self.symbols[self.last_id] = symbol - self.name_to_id[name] = self.last_id + symbol = Symbol(name, val, self.id, self.usable_id) + self.symbols[self.usable_id] = symbol + self.name_to_id[name] = self.usable_id + print(f"!!! || {self.usable_id} -> {name}") + self.usable_id += 1 def __repr__(self) -> str: return f"{self.symbols}" @@ -89,13 +90,14 @@ def __repr__(self): class Lambda(AstirExpr): - def __init__(self, parameters: SymbolTable, body: AstirExpr, belongs_to: int): + def __init__(self, parameters: SymbolTable, body: AstirExpr, belongs_to: int, symbol_id: int): lambda_def = LambdaDefinition(parameters) super().__init__(lambda_def) self.definition = lambda_def self.belongs_to = belongs_to self.body = body - + self.symbol_id = symbol_id + def __repr__(self): return f"Lambda(Def={self.definition}, Body={self.body})" From 525805e96e80f89d5d01ac8d0c3a20a413ec714d Mon Sep 17 00:00:00 2001 From: ibx34 Date: Sat, 21 Dec 2024 22:19:04 -0500 Subject: [PATCH 15/15] cleanup --- asm.py | 169 ++++++++++++++++++++++++++++++++------------------- ast_exprs.py | 3 +- 2 files changed, 106 insertions(+), 66 deletions(-) diff --git a/asm.py b/asm.py index 9a73349..1e14f80 100644 --- a/asm.py +++ b/asm.py @@ -1,8 +1,22 @@ from abc import ABC -from ast_exprs import Application, Assignment, AstirExpr, Identifier, Lambda, Literal, Reference, ShuntingYardAlgorithmResults, Symbol, SymbolTable # type: ignore +from ast_exprs import Application, Assignment, AstirExpr, Identifier, Lambda, LambdaDefinition, Literal, Reference, ShuntingYardAlgorithmResults, Symbol, SymbolTable # type: ignore from common import Cursor, PrimitiveTypes +class ASMFunction: + def __init__( + self, param_to_reg: dict[int, int], name_to_param: dict[str, int] + ) -> None: + self.param_to_reg = param_to_reg + self.name_to_param = name_to_param + # The next usable register is calculated by getting + # the last inserted item in the self.param_to_reg + # and by adding 1 to get us to the next x register + self.next_usable_reg = ( + next(reversed(param_to_reg)) + 1 if len(param_to_reg) > 0 else 0 + ) + + class ASM(Cursor): def __init__( self, input: list[AstirExpr], symbol_tables: dict[int, SymbolTable] @@ -15,7 +29,7 @@ def __init__( # The inner dictionary holds the parameter index -> register # map. TODO: introduce a way to recognize different # register sizes for a64 (rn) - self.fn_register_store: dict[int, dict[int, int]] = {} + self.fn_register_store: dict[int, ASMFunction] = {} self.inside_fn: int | None = None # # Format (ref_id, register) # self.ref_id_and_register: list[tuple[int, int]] = [] @@ -25,6 +39,14 @@ def __init__( # # This should be cleared after every generate call. # self.registers_in_use: list[Register] = [] + def is_register_reserved(self, register: int) -> bool: + return False + + def current_fn(self) -> ASMFunction | None: + if self.inside_fn is None: + return None + return self.fn_register_store[self.inside_fn] + def generate_all(self): while self.current() is not None: generated = self.generate() @@ -48,6 +70,7 @@ def generate(self, expr: AstirExpr | None = None) -> list[str]: symbols = c_expr.right.definition.parameters.symbols last_used_register = 0 lambda_param_to_register: dict[int, int] = {} + param_name_to_idx: dict[str, int] = {} for symbol_idx in symbols: symbol = symbols[symbol_idx] if symbol.name == "ret": @@ -55,75 +78,93 @@ def generate(self, expr: AstirExpr | None = None) -> list[str]: # should always be the last item in the dict break lambda_param_to_register[symbol_idx] = last_used_register + param_name_to_idx[symbol.name] = symbol_idx last_used_register += 1 - self.fn_register_store[c_expr.right.symbol_id] = lambda_param_to_register + asm_function = ASMFunction(lambda_param_to_register, param_name_to_idx) + self.fn_register_store[c_expr.right.symbol_id] = asm_function # This is so we can parse and get the correct arguments self.inside_fn = c_expr.right.symbol_id - to_add.append(f"{c_expr.left.value}: // Symbol ID: {c_expr.right.symbol_id}") + to_add.append( + f"{c_expr.left.value}: // Symbol ID: {c_expr.right.symbol_id}" + ) + # to_add.append(f"// {asm_function.next_usable_reg}") to_add.extend(self.generate(c_expr.right.body)) to_add.append("ret") - elif isinstance(c_expr, Reference): - pass + self.inside_fn = None elif isinstance(c_expr, ShuntingYardAlgorithmResults): - if self.inside_fn is None: - raise Exception() - print(f"CURRENT FN: {self.inside_fn} AND ITS PARAMS: {self.fn_register_store[self.inside_fn]}") - pass - # elif isinstance(c_expr, Application): - # reserved_registers = self.fn_register_man[c_expr.lambda_ref.symbol_id] - # for idx, v in enumerate(reserved_registers): - # if not (0 <= idx < len(c_expr.parameters)): - # raise Exception(f"Invalid application") - # param: AstirExpr = c_expr.parameters[idx] - # if not isinstance(param, Literal) or param.ty != PrimitiveTypes.INT: - # raise Exception("TODO: HANDLE MORE THAN JUST LITERALS") - # to_add.append(f"mov x{v[1]}, {param.val}") + inside_fn = self.current_fn() + if inside_fn is None: + raise Exception("Out of place sya...") + if len(c_expr.oeprators) > 0: + raise Exception("Invalid shunting yard algorithm") + stack: list[str] = [] + c_expr.results.reverse() + while len(c_expr.results) > 0 and (term := c_expr.results.pop()): + # TODO: make some like class method or something + # to make this cleaner?? + if isinstance(term, Reference): + stack.extend(self.generate(term)) + elif isinstance(term, Literal): + if term.ty != PrimitiveTypes.INT: + raise Exception("Unexpected type.") + stack.append(str(term.val)) + elif isinstance(term, str): + if term == "+": + stack.reverse() + (item1, item2) = (stack.pop(), stack.pop()) + if not item1.startswith("x"): + register = inside_fn.next_usable_reg + to_add.append(f"mov x{register}, {item1}") + item1 = f"x{register}" + inside_fn.next_usable_reg += 1 + print( + f"Adding last two items on stack: {item1}, {item2} = {item1 + item2}" + ) + to_add.append(f"add x0, {item1}, {item2}") + elif isinstance(c_expr, Reference): + symbol_in_ref: Symbol | None = self.lookup_symbol( + c_expr.belongs_to, c_expr.symbol_id + ) + if symbol_in_ref is None: + raise Exception(f'Failed to lookup referenced symbol "{c_expr.name}"') + if ( + (c_fn := self.current_fn()) + and c_fn is not None + and c_expr.name in c_fn.name_to_param + and c_fn.name_to_param[c_expr.name] in c_fn.param_to_reg + ): + register = c_fn.param_to_reg[c_fn.name_to_param[c_expr.name]] + to_add.append(f"x{register}") + elif isinstance(c_expr, Application): + fn_parameters = self.fn_register_store[c_expr.lambda_ref.symbol_id] + if fn_parameters is None: + raise Exception("Failed to get reserved registers for fn") + elif len(list(fn_parameters.param_to_reg.keys())) != len(c_expr.parameters): + raise Exception("More parameters than reserved registers...") + application_symbol: Symbol | None = self.lookup_symbol( + c_expr.lambda_ref.belongs_to, c_expr.lambda_ref.symbol_id + ) + if application_symbol is None: + raise Exception(f"failed to find symbol {c_expr.lambda_ref.symbol_id}") + elif ( + not isinstance(application_symbol.val, Lambda) + or c_expr.lambda_ref.symbol_id not in self.fn_register_store + ): + raise Exception( + "Expected this symbol to come back to a lambda definition" + ) - # symbol: Symbol | None = self.lookup_symbol( - # c_expr.lambda_ref.belongs_to, c_expr.lambda_ref.symbol_id - # ) - # if symbol is None: - # raise Exception(f"failed to find symbol {c_expr.lambda_ref.symbol_id}") - # to_add.append(f"bl {symbol.name}") - # elif isinstance(c_expr, Reference): - # symbol = self.lookup_symbol(c_expr.belongs_to, c_expr.symbol_id) - # if symbol is None: - # raise Exception("failed to lookup symbol") - # if isinstance(symbol.val, Reference): - # symbol2 = self.lookup_symbol( - # symbol.val.belongs_to, symbol.val.symbol_id - # ) - # register = self.current_usable_register - # self.ref_id_and_register.append((symbol.val.symbol_id, register)) - # to_add.append(f"x{register}") # Temp - # elif isinstance(c_expr, ShuntingYardAlgorithmResults): - # if len(c_expr.oeprators) > 0: - # raise Exception("Invalid shunting yard algorithm") - # stack: list[str] = [] - # c_expr.results.reverse() - # while len(c_expr.results) > 0 and (term := c_expr.results.pop()): - # # TODO: make some like class method or something - # # to make this cleaner?? - # if isinstance(term, Reference): - # stack.extend(self.generate(term)) - # elif isinstance(term, Literal): - # if term.ty != PrimitiveTypes.INT: - # raise Exception("Unexpected type.") - # stack.append(str(term.val)) - # elif isinstance(term, str): - # if term == "+": - # stack.reverse() - # (item1, item2) = (stack.pop(), stack.pop()) - # if not item1.startswith("x"): - # register = self.current_usable_register - # to_add.append(f"mov x{register}, {item1}") - # item1 = f"x{register}" - # self.current_usable_register += 1 - # print( - # f"Adding last two items on stack: {item1}, {item2} = {item1 + item2}" - # ) - # to_add.append(f"add x0, {item1}, {item2}") + function_parameters = application_symbol.val.definition.parameters.symbols + for idx, param in enumerate(c_expr.parameters): + if not (0 <= idx < len(list(fn_parameters.param_to_reg.keys()))): + raise Exception( + "Invalid Application...More parameters than reserved registers" + ) + reserved_register = fn_parameters.param_to_reg[idx] + if not isinstance(param, Literal) or param.ty != PrimitiveTypes.INT: + raise Exception("TODO: HANDLE MORE THAN JUST LITERALS") + to_add.append(f"mov x{reserved_register}, {param.val}") + to_add.append(f"bl {application_symbol.name}") - # print(f"{c_expr}") return to_add diff --git a/ast_exprs.py b/ast_exprs.py index 81d2461..1452a17 100644 --- a/ast_exprs.py +++ b/ast_exprs.py @@ -58,7 +58,6 @@ def insert(self, name: str, val: AstirExpr) -> None: symbol = Symbol(name, val, self.id, self.usable_id) self.symbols[self.usable_id] = symbol self.name_to_id[name] = self.usable_id - print(f"!!! || {self.usable_id} -> {name}") self.usable_id += 1 def __repr__(self) -> str: @@ -97,7 +96,7 @@ def __init__(self, parameters: SymbolTable, body: AstirExpr, belongs_to: int, sy self.belongs_to = belongs_to self.body = body self.symbol_id = symbol_id - + def __repr__(self): return f"Lambda(Def={self.definition}, Body={self.body})"