Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: squash modules into one to enable optimizations #16

Merged
merged 1 commit into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

_internal.pyx
69 changes: 61 additions & 8 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import os
import shutil
import sys
import ast
from pathlib import Path

# setuptools *must* come before Cython, otherwise Cython's distutils hacking
Expand All @@ -16,17 +17,69 @@
BUILD_DIR = Path("cython_build")


extensions = [
Extension("koerce.annots", ["koerce/annots.py"]),
Extension("koerce.builders", ["koerce/builders.py"]),
Extension("koerce.patterns", ["koerce/patterns.py"]),
# Extension("koerce.utils", ["koerce/utils.py"]),
]
def extract_imports_and_code(path):
"""Extracts the import statements and other code from python source."""
with path.open("r") as file:
tree = ast.parse(file.read(), filename=path.name)

code = []
imports = []
for node in tree.body:
if isinstance(node, (ast.Import, ast.ImportFrom)):
imports.append(node)
else:
code.append(node)

return imports, code


def ignore_import(imp, modules):
absolute_names = ["koerce.{name}" for name in modules]
if isinstance(imp, ast.ImportFrom):
return imp.module in modules
elif isinstance(imp, ast.Import):
return imp.names[0].name in absolute_names
else:
raise TypeError(imp)


def concatenate_files(file_paths, output_file):
all_imports = []
all_code = []
modules = []

for file_path in file_paths:
path = Path(SOURCE_DIR / file_path)
imports, code = extract_imports_and_code(path)
all_imports.extend(imports)
all_code.extend(code)
modules.append(path.stem)

# Deduplicate imports by their unparsed code
unique_imports = {ast.unparse(stmt): stmt for stmt in all_imports}

# Write to the output file
with (SOURCE_DIR / output_file).open("w") as out:
# Write unique imports
for code, stmt in unique_imports.items():
if not ignore_import(stmt, modules):
out.write(code)
out.write("\n")

# Write the rest of the code
for stmt in all_code:
out.write(ast.unparse(stmt))
out.write("\n\n\n")


concatenate_files(["builders.py", "patterns.py", "annots.py"], "_internal.pyx")
extension = Extension("koerce._internal", ["koerce/_internal.pyx"])
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't able to cimport from pure python cythonized modules even with agumenting pxd files, so I started to move over to pure cython syntax, but the development time has increased which made me value fast iterations during development even more.
So I had to come up with a solution so that the implementation is being split into modules (easier maintanence) but still profit from the timing. The solution was to squash all the python files into a single one before cythonizing, in order to make it work the internal imports must be removed.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess we'll see whether this becomes a burden long term. For now, the solution seems good!

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is already a feature request on cython's side, hopefully this will be fixed in the meantime cython/cython#4892


cythonized_modules = cythonize(
extensions,
[extension],
build_dir=BUILD_DIR,
# generate anannotated .html output files.
cache=True,
show_all_warnings=False,
annotate=True,
compiler_directives={
"language_level": "3",
Expand Down
87 changes: 84 additions & 3 deletions koerce/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,87 @@
from __future__ import annotations

from .patterns import NoMatch, Pattern
from .sugar import match, var
import sys

__all__ = ["NoMatch", "Pattern", "match", "var"]
from ._internal import *


class _Variable(Deferred):
def __init__(self, name: str):
builder = Var(name)
super().__init__(builder)

def __invert__(self):
return Capture(self)


class _Namespace:
"""Convenience class for creating patterns for various types from a module.

Useful to reduce boilerplate when creating patterns for various types from
a module.

Parameters
----------
factory
The pattern to construct with the looked up types.
module
The module object or name to look up the types.

"""

__slots__ = ("_factory", "_module")

def __init__(self, factory, module):
if isinstance(module, str):
module = sys.modules[module]
self._module = module
self._factory = factory

def __getattr__(self, name: str):
obj = getattr(self._module, name)
return self._factory(obj)


def var(name):
return _Variable(name)


def match(pat: Pattern, value: Any, context: Context = None) -> Any:
"""Match a value against a pattern.

Parameters
----------
pat
The pattern to match against.
value
The value to match.
context
Arbitrary mapping of values to be used while matching.

Returns
-------
The matched value if the pattern matches, otherwise :obj:`NoMatch`.

Examples
--------
>>> assert match(Any(), 1) == 1
>>> assert match(1, 1) == 1
>>> assert match(1, 2) is NoMatch
>>> assert match(1, 1, context={"x": 1}) == 1
>>> assert match(1, 2, context={"x": 1}) is NoMatch
>>> assert match([1, int], [1, 2]) == [1, 2]
>>> assert match([1, int, "a" @ InstanceOf(str)], [1, 2, "three"]) == [
... 1,
... 2,
... "three",
... ]

"""
pat = pattern(pat)
return pat.apply(value, context)


_ = var("_")


# define __all__
7 changes: 7 additions & 0 deletions koerce/_internal.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from __future__ import annotations

from .annots import *
from .builders import *
from .patterns import *

compiled = False
Loading
Loading