Skip to content

Commit

Permalink
dependency_boundary, support partial as func
Browse files Browse the repository at this point in the history
  • Loading branch information
albertz committed Dec 5, 2024
1 parent 0a54cdd commit 42c91d0
Showing 1 changed file with 47 additions and 16 deletions.
63 changes: 47 additions & 16 deletions common/helpers/dependency_boundary.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,19 @@
"""

from typing import Any, Optional, TypeVar, Callable
from sisyphus.hash import short_hash
from sisyphus.tools import extract_paths
from i6_experiments.common.utils.dump_py_code import PythonCodeDumper
from i6_experiments.common.utils.diff import collect_diffs

import os
import sys
import functools
import textwrap
import importlib.util

from sisyphus.hash import short_hash
from sisyphus.tools import extract_paths

from i6_experiments.common.utils.dump_py_code import PythonCodeDumper
from i6_experiments.common.utils.diff import collect_diffs


T = TypeVar("T")

Expand Down Expand Up @@ -59,7 +63,7 @@ def dependency_boundary(func: Callable[[], T], *, hash: Optional[str]) -> T:
cached_paths_available = _paths_available(func, obj_via_cache)
except Exception as exc:
print(
f"Dependency boundary for {func.__qualname__}:"
f"Dependency boundary for {_func_repr(func)}:"
f" error, exception {type(exc).__name__} {str(exc)!r} while loading the cache,"
" will ignore the cache"
)
Expand All @@ -68,41 +72,41 @@ def dependency_boundary(func: Callable[[], T], *, hash: Optional[str]) -> T:
cached_paths_available = False

if hash_via_user and hash_via_cache and hash_via_user == hash_via_cache and cached_paths_available:
print(f"Dependency boundary for {func.__qualname__}: using cached object with hash {hash_via_user}")
print(f"Dependency boundary for {_func_repr(func)}: using cached object with hash {hash_via_user}")
return obj_via_cache

# Either user hash invalid, or cached hash invalid, or not all paths are available, or user hash not defined.
# In any case, need to check actual function.
obj_via_func = func()
assert obj_via_func is not None # unexpected
hash_via_func = short_hash(obj_via_func)
print(f"Dependency boundary for {func.__qualname__}: hash of original object = {hash_via_func}")
print(f"Dependency boundary for {_func_repr(func)}: hash of original object = {hash_via_func}")

if not hash_via_user:
print(f"Dependency boundary for {func.__qualname__}: you should add the hash to the dependency_boundary call")
print(f"Dependency boundary for {_func_repr(func)}: you should add the hash to the dependency_boundary call")

if hash_via_user and hash_via_user != hash_via_func:
print(
f"Dependency boundary for {func.__qualname__}: error, given hash ({hash_via_user}) is invalid,"
f"Dependency boundary for {_func_repr(func)}: error, given hash ({hash_via_user}) is invalid,"
" please fix the hash given to the dependency_boundary call"
)

if hash_via_cache and hash_via_cache != hash_via_func:
print(
f"Dependency boundary for {func.__qualname__}: error, cached hash {hash_via_cache} is invalid,"
f"Dependency boundary for {_func_repr(func)}: error, cached hash {hash_via_cache} is invalid,"
" will recreate the cache"
)
hash_via_cache = None

if not hash_via_cache:
print(f"Dependency boundary for {func.__qualname__}: create or update cache {cache_fn!r}")
print(f"Dependency boundary for {_func_repr(func)}: create or update cache {cache_fn!r}")
save_obj_to_cache_file(obj_via_func, cache_filename=cache_fn)
# Do some check that the dumped object has the same hash.
obj_via_cache = load_obj_from_cache_file(cache_fn)
hash_via_cache = short_hash(obj_via_cache)
if hash_via_func != hash_via_cache:
print(
f"Dependency boundary for {func.__qualname__}: error, dumping logic stores inconsistent object,"
f"Dependency boundary for {_func_repr(func)}: error, dumping logic stores inconsistent object,"
f" dumped object hash {hash_via_cache}"
)
print("Differences:")
Expand All @@ -114,7 +118,7 @@ def dependency_boundary(func: Callable[[], T], *, hash: Optional[str]) -> T:
print("(No differences detected?)")
if hash_via_cache == hash_via_user:
print(
f"Dependency boundary for {func.__qualname__}:"
f"Dependency boundary for {_func_repr(func)}:"
f" error, user provided hash is matching to wrong cache!"
)
os.remove(cache_fn) # make sure it is not used
Expand All @@ -126,9 +130,9 @@ def get_cache_filename_for_func(func: Callable[[], T]) -> str:
"""
:return: filename of autogenerated Python file
"""
mod = sys.modules[getattr(func, "__module__")]
mod = sys.modules[_func_module(func)]
mod_dir = os.path.dirname(os.path.abspath(mod.__file__))
return f"{mod_dir}/_dependency_boundary_autogenerated_cache.{mod.__name__.split('.')[-1]}.{func.__qualname__}.py"
return f"{mod_dir}/_dependency_boundary_autogenerated_cache.{_func_name(func)}.py"


def save_obj_to_cache_file(obj: Any, *, cache_filename: str) -> None:
Expand Down Expand Up @@ -171,7 +175,34 @@ def _paths_available(func, obj: Any) -> bool:
paths = extract_paths(obj)
for path in paths:
if not path.available():
print(f"Dependency boundary for {func.__qualname__}: path {path} in cached object not available")
print(f"Dependency boundary for {_func_repr(func)}: path {path} in cached object not available")
# No need to print this for all paths, just the first one is enough.
return False
return True


def _func_name(func: Callable[[], T]) -> str:
if isinstance(func, functools.partial):
return f"{_func_name(func.func)}.partial{short_hash(func)}"
if hasattr(func, "__module__") and hasattr(func, "__qualname__"):
return f"{func.__module__.split('.')[-1]}.{func.__qualname__}"
raise ValueError(f"cannot get name for func {func!r}")


def _func_module(func: Callable[[], T]) -> str:
if isinstance(func, functools.partial):
return _func_module(func.func)
if hasattr(func, "__module__"):
return func.__module__
raise ValueError(f"cannot get module for func {func!r}")


def _func_repr(func: Callable[[], T]) -> str:
if isinstance(func, functools.partial):
args = [_func_repr(func.func)]
args.extend(repr(x) for x in func.args)
args.extend(f"{k}={v!r}" for (k, v) in func.keywords.items())
return f"partial({', '.join(args)})"
if hasattr(func, "__module__") and hasattr(func, "__qualname__"):
return f"{func.__module__.split('.')[-1]}.{func.__qualname__}"
return repr(func)

0 comments on commit 42c91d0

Please sign in to comment.