feat: hierachical, multi-source settings manager

DRAFT
datalad · Sep 28, 2024 · 72d33c6 · 72d33c6
1 parent b7c17d7
commit 72d33c6
Show file tree

Hide file tree

Showing 14 changed files with 989 additions and 0 deletions.
diff --git a/datasalad/__init__.py b/datasalad/__init__.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 from datasalad._version import __version__
 
 __all__ = [

diff --git a/datasalad/settings/__init__.py b/datasalad/settings/__init__.py
@@ -0,0 +1,53 @@
+"""Hierarchical, multi-source settings management
+
+Validation of configuration item values
+
+
+There are two ways to do validation and type conversion.  on-access, or
+on-load. Doing it on-load would allow to reject invalid configuration
+immediately. But it might spend time on items that never get accessed.
+On-access might waste cycles on repeated checks, and possible complain later
+than useful. Here we nevertheless run a validator on-access in the default
+implementation. Particular sources may want to override this, or ensure that
+the stored value that is passed to a validator is already in the best possible
+form to make re-validation the cheapest.
+
+.. currentmodule:: datasalad.settings
+.. autosummary::
+   :toctree: generated
+
+   Settings
+   Setting
+   Source
+   CachingSource
+   Environment
+   InMemorySettings
+   Defaults
+   UnsetValue
+"""
+
+from __future__ import annotations
+
+from .defaults import Defaults
+from .env import Environment
+from .setting import (
+    Setting,
+    UnsetValue,
+)
+from .settings import Settings
+from .source import (
+    CachingSource,
+    InMemorySettings,
+    Source,
+)
+
+__all__ = [
+    'CachingSource',
+    'Defaults',
+    'Environment',
+    'InMemorySettings',
+    'Setting',
+    'Settings',
+    'Source',
+    'UnsetValue',
+]
diff --git a/datasalad/settings/defaults.py b/datasalad/settings/defaults.py
@@ -0,0 +1,31 @@
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+from datasalad.settings.source import InMemorySettings
+
+if TYPE_CHECKING:
+    from datasalad.settings.setting import Setting
+
+lgr = logging.getLogger('datasalad.settings')
+
+
+class Defaults(InMemorySettings):
+    """
+    Defaults are not loaded from any source. Clients have to set any
+    items they want to see a default be known for. There would typically be
+    only one instance of this class, and it is then the true source of the
+    information by itself.
+    """
+
+    def __setitem__(self, key: str, value: Setting) -> None:
+        if key in self:
+            # resetting is something that is an unusual event.
+            # __setitem__ does not allow for a dedicated "force" flag,
+            # so we leave a message at least
+            lgr.debug('Resetting %r default', key)
+        super().__setitem__(key, value)
+
+    def __str__(self):
+        return 'Defaults'
diff --git a/datasalad/settings/env.py b/datasalad/settings/env.py
@@ -0,0 +1,143 @@
+from __future__ import annotations
+
+import logging
+from os import (
+    environ,
+)
+from os import (
+    name as os_name,
+)
+from typing import (
+    TYPE_CHECKING,
+    Any,
+)
+
+from datasalad.settings.setting import Setting
+from datasalad.settings.source import Source
+
+if TYPE_CHECKING:
+    from collections.abc import Collection
+
+lgr = logging.getLogger('datasalad.settings')
+
+
+class Environment(Source):
+    """Process environment settings source
+
+    This is a stateless source implementation that gets and sets items directly
+    in the process environment.
+
+    Environment variables can be filtered by declaring a name prefix. More
+    complex filter rules can be implemented by replacing the
+    :meth:`include_var()` method in a subclass.
+
+    It is possible to transform environment variable name to setting keys (and
+    vice versa), by implementing the methods :meth:`get_key_from_varname()` and
+    :meth:`get_varname_from_key()`.
+    """
+
+    is_writable = True
+
+    def __init__(
+        self,
+        *,
+        var_prefix: str | None = None,
+    ):
+        super().__init__()
+        self._var_prefix = var_prefix
+
+    def reinit(self):
+        """Does nothing"""
+
+    def load(self) -> None:
+        """Does nothing"""
+
+    def __getitem__(self, key: str) -> Setting:
+        matching = {
+            k: v
+            for k, v in environ.items()
+            # search for any var that match the key when transformed
+            if self.include_var(name=k, value=v) and self.get_key_from_varname(k) == key
+        }
+        if not matching:
+            raise KeyError
+        if len(matching) > 1:
+            lgr.warning(
+                'Ambiguous key %r matching multiple ENV vars: %r',
+                key,
+                list(matching.keys()),
+            )
+        k, v = matching.popitem()
+        return Setting(value=v)
+
+    def __setitem__(self, key: str, value: Setting) -> None:
+        name = self.get_varname_from_key(key)
+        environ[name] = str(value.value)
+
+    def get(self, key, default: Any = None) -> Setting:
+        try:
+            return self[key]
+        except KeyError:
+            if isinstance(default, Setting):
+                return default
+            return Setting(value=default)
+
+    def keys(self) -> Collection:
+        """Returns all keys that can be determined from the environment"""
+        return {
+            self.get_key_from_varname(k)
+            for k, v in environ.items()
+            if self.include_var(name=k, value=v)
+        }
+
+    def __str__(self):
+        return f'Environment[{self._var_prefix}]' if self._var_prefix else 'Environment'
+
+    def __contains__(self, key: str) -> bool:
+        # we only need to reimplement this due to Python's behavior to
+        # forece-modify environment variable names on Windows. Only
+        # talking directly for environ accounts for that
+        return self.get_varname_from_key(key) in environ
+
+    def __repr__(self):
+        # TODO: list keys?
+        return 'Environment()'
+
+    def include_var(
+        self,
+        name: str,
+        value: str,  # noqa: ARG002 (default implementation does not need it)
+    ) -> bool:
+        """Determine whether to source a setting from an environment variable
+
+        This default implementation tests whether the name of the variable
+        starts with the ``var_prefix`` given to the constructor.
+
+        Reimplement this method to perform custom tests.
+        """
+        return name.startswith(self._var_prefix or '')
+
+    def get_key_from_varname(self, name: str) -> str:
+        """Transform an environment variable name to a setting key
+
+        This default implementation performs returns the unchanged
+        name as a key.
+
+        Reimplement this method and ``get_varname_from_key()`` to perform
+        custom transformations.
+        """
+        return name
+
+    def get_varname_from_key(self, key: str) -> str:
+        """Transform a setting key to an environment variable name
+
+        This default implementation on checks for illegal names and
+        raises a ``ValueError``. Otherwise it returns the unchanged key.
+        """
+        if '=' in key or '\0' in key:
+            msg = "illegal environment variable name (contains '=' or NUL)"
+            raise ValueError(msg)
+        if os_name in ('os2', 'nt'):
+            # https://stackoverflow.com/questions/19023238/why-python-uppercases-all-environment-variables-in-windows
+            return key.upper()
+        return key
diff --git a/datasalad/settings/setting.py b/datasalad/settings/setting.py
@@ -0,0 +1,124 @@
+from __future__ import annotations
+
+from copy import copy
+from typing import (
+    Any,
+    Callable,
+)
+
+
+class UnsetValue:
+    """Placeholder type to indicate a value that has not been set"""
+
+
+class Setting:
+    """Representation of an individual setting"""
+
+    def __init__(
+        self,
+        value: Any | UnsetValue = UnsetValue,
+        *,
+        coercer: Callable | None = None,
+        lazy: bool = False,
+    ):
+        """
+        ``value`` can be of any type.  A setting instance created with
+        default :class:`UnsetValue` represents a setting with no known value.
+
+        The ``coercer`` is a callable that processes a setting value
+        on access via :attr:`value`. This callable can perform arbitrary
+        processing, including type conversion and validation.
+
+        If ``lazy`` is ``True``, ``value`` must be a callable that requires
+        no parameters. This callable will be executed each time :attr:`value`
+        is accessed, and its return value is passed to the ``coercer``.
+        """
+        if lazy and not callable(value):
+            msg = 'callable required for lazy evaluation'
+            raise ValueError(msg)
+        self._value = value
+        self._coercer = coercer
+        self._lazy = lazy
+
+    @property
+    def pristine_value(self) -> Any:
+        """Original, uncoerced value"""
+        return self._value
+
+    @property
+    def value(self) -> Any:
+        """Value of a setting after coercion
+
+        For a lazy setting, accessing this property also triggers the
+        evaluation.
+        """
+        # we ignore the type error here
+        # "error: "UnsetValue" not callable"
+        # because we rule this out in the constructor
+        val = self._value() if self._lazy else self._value  # type: ignore [operator]
+        if self._coercer:
+            return self._coercer(val)
+        return val
+
+    @property
+    def coercer(self) -> Callable | None:
+        """``coercer`` of a setting, or ``None`` if there is none"""
+        return self._coercer
+
+    @property
+    def is_lazy(self) -> bool:
+        """Flag whether the setting evaluates on access"""
+        return self._lazy
+
+    def update(self, other: Setting) -> None:
+        """Update the item from another
+
+        This replaces any ``value`` or ``coercer`` set in the other
+        setting. If case the other's ``value`` is :class:`UnsetValue`
+        no update of the ``value`` is made. Likewise, if ``coercer``
+        is ``None``, no update is made. Update to or from a ``lazy``
+        value will also update the ``lazy`` property accordingly.
+        """
+        if other._value is not UnsetValue:  # noqa: SLF001
+            self._value = other._value  # noqa: SLF001
+            # we also need to syncronize the lazy eval flag
+            # so we can do the right thing (TM) with the
+            # new value
+            self._lazy = other._lazy  # noqa: SLF001
+
+        if other._coercer:  # noqa: SLF001
+            self._coercer = other._coercer  # noqa: SLF001
+
+    def __str__(self) -> str:
+        # wrap the value in the classname to make clear that
+        # the actual object type is different from the value
+        return f'{self.__class__.__name__}({self._value})'
+
+    def __repr__(self) -> str:
+        # wrap the value in the classname to make clear that
+        # the actual object type is different from the value
+        return (
+            f'{self.__class__.__name__}('
+            f'{self.value!r}'
+            f', coercer={self._coercer!r}'
+            f', lazy={self._lazy}'
+            ')'
+        )
+
+    def __eq__(self, item: object) -> bool:
+        """
+        This default implementation of comparing for equality only compare the
+        types, value, and coercer of the two items. If additional criteria are
+        relevant for derived classes :meth:`__eq__` has to be reimplemented.
+        """
+        if not isinstance(item, type(self)):
+            return False
+        return (
+            self._lazy == item._lazy
+            and self._value == item._value
+            and self._coercer == item._coercer
+        )
+
+    def copy(self):
+        """Return a shallow copy of the instance"""
+        return copy(self)