From e33718cc37374619a9ba64b1ccf0e639552ba966 Mon Sep 17 00:00:00 2001 From: Michael Hanke Date: Thu, 26 Sep 2024 22:20:47 +0200 Subject: [PATCH] feat: hierachical, multi-source settings manager The new sub-package `datasalad.settings` provides a framework for implementing a system where information items can be read from and written to any number of sources, and sources are ordered to implement a simple precedence rule. An example of such a system is the layered Git config setup, which system, global, local and other scopes. This can serve as the basis for a revamped configuration manager for DataLad. This changeset is complete with tests and documentation. Refs: https://github.com/datalad/datalad-next/issues/397 --- datasalad/__init__.py | 2 + datasalad/settings/__init__.py | 196 ++++++++++++++++ datasalad/settings/defaults.py | 54 +++++ datasalad/settings/env.py | 176 ++++++++++++++ datasalad/settings/setting.py | 124 ++++++++++ datasalad/settings/settings.py | 147 ++++++++++++ datasalad/settings/source.py | 270 ++++++++++++++++++++++ datasalad/settings/tests/__init__.py | 0 datasalad/settings/tests/test_defaults.py | 53 +++++ datasalad/settings/tests/test_env.py | 143 ++++++++++++ datasalad/settings/tests/test_setting.py | 37 +++ datasalad/settings/tests/test_settings.py | 66 ++++++ datasalad/settings/tests/test_source.py | 74 ++++++ docs/index.rst | 1 + pyproject.toml | 2 +- 15 files changed, 1344 insertions(+), 1 deletion(-) create mode 100644 datasalad/settings/__init__.py create mode 100644 datasalad/settings/defaults.py create mode 100644 datasalad/settings/env.py create mode 100644 datasalad/settings/setting.py create mode 100644 datasalad/settings/settings.py create mode 100644 datasalad/settings/source.py create mode 100644 datasalad/settings/tests/__init__.py create mode 100644 datasalad/settings/tests/test_defaults.py create mode 100644 datasalad/settings/tests/test_env.py create mode 100644 datasalad/settings/tests/test_setting.py create mode 100644 datasalad/settings/tests/test_settings.py create mode 100644 datasalad/settings/tests/test_source.py diff --git a/datasalad/__init__.py b/datasalad/__init__.py index 712b32e..1ef7381 100644 --- a/datasalad/__init__.py +++ b/datasalad/__init__.py @@ -1,3 +1,5 @@ +from __future__ import annotations + from datasalad._version import __version__ __all__ = [ diff --git a/datasalad/settings/__init__.py b/datasalad/settings/__init__.py new file mode 100644 index 0000000..009acac --- /dev/null +++ b/datasalad/settings/__init__.py @@ -0,0 +1,196 @@ +"""Hierarchical, multi-source settings management + +This module provides a framework for implementing a system where +information items can be read from and written to any number of sources, +and sources are ordered to implement a simple precedence rule. +An example of such a system is the layered Git config setup, which +system, global, local and other scopes. + +The framework is built on three main classes: + +- :class:`Setting`: an individual information item +- :class:`Source`: base class for a settings provider +- :class:`Settings`: the top-level API for a multi-source settings manager + + +Basic usage +----------- + +To establish a settings manager instance one needs to create an instance +of :class:`Settings` and supply it with any instances of sources that +the manager should consider. Importantly, the order in which the sources +are declared also represents the precedence rule. Sources declared first +take precedence over sources declared later. + +>>> from datasalad.settings import Settings, Environment, Defaults +>>> defaults = Defaults() +>>> settings = Settings( +... { +... 'env': Environment(var_prefix='myapp_'), +... # any number of additional sources could be here +... 'defaults': defaults, +... } +... ) + +It often makes sense to use a dedicated instance of :class:`Defaults` (a +variant of :class:`InMemory`). It can be populated on import +to collect all default settings of an application, and simplifies +implementations, because all possible settings are known to this +instance. + +>>> defaults['myconf'] = Setting('default-value') +>>> settings['myconf'].value +'default-value' + +It is also possible to equip a setting with a callable that performs +type-coercion or validation: + +>>> defaults['myapp_conf'] = Setting('5', coercer=int) +>>> settings['myapp_conf'].value +5 + +This coercer is inherited, if not overwritten, even when the value +with the highest precedence is retrieved from a different source, +which does not provide a coercer itself. + +>>> # set value for `myapp_conf` in the `env` source +>>> settings.sources['env']['myapp_conf'] = Setting('123') +>>> settings['myapp_conf'].value +123 + + +Advanced usage +-------------- + +The usage patterns already shown above are often all that is needed. +However, the framework is more flexible and allows for implementing +more flexible solutions. + +Setting keys need not be of type ``str``, but can be any hashable type, +and need not necessarily be homogeneous across (or even within) individual +sources, as long as their are hashable + +>>> defaults[(0, 1, 2)] = Settings(object) + +There is support for multiple values registered under a single key, even within +a single source. The standard accessor methods (:meth:`__getitem__`, and +:meth:`~Settings.get`), however, always return a single item only. In case of +multiple available values, they return an item that is the composition of item +properties with the highest precedence. In contrast, the +:meth:`~Settings.getall`) method return all setting items across all sources as +a ``tuple``. + +The :class:`Settings` class does not support setting values. Instead, the +desired source has to beselected explicitly via the :meth:`~Settings.sources` +method (as shown in the example above). This allows for individual sources to +offer an API and behavior that is optimally tuned for a particular source type, +rather than be constrained by a common denominator across all possible source +types. Sources are registered and selected via a unique, use case specific +identifier. This should make clear what kind of source is being written to in +application code. + +It is also possible to use this framework with custom :class:`Setting` +subclasses, possibly adding properties or additional methods. The +:class:`Settings` class variable :attr:`~Settings.item_type` can take +a type that is used for returning default values. + + +Implement custom sources +------------------------ + +Custom sources can be implemented by subclassing +:class:`~datasalad.settings.Source`, and implementing methods for its +``dict``-like interface. Different (abstract) base classes are provided for +common use cases + +:class:`~datasalad.settings.Source` is the most basic class, suitable +for any read-only source. It requires implementing the following methods +(see their documentation for details): + +- :meth:`~datasalad.settings.Source.reinit` +- :meth:`~datasalad.settings.Source.load` +- :meth:`~datasalad.settings.Source.__getitem__` +- :meth:`~datasalad.settings.Source.keys` + +:class:`~datasalad.settings.WritableSource` extends the interface +with methods for modification of a writable source: + +- :meth:`~datasalad.settings.Source.__setitem__` +- :meth:`~datasalad.settings.Source.__delitem__` + +The property :meth:`~datasalad.settings.WritableSource.is_writable` returns +``True`` by default. It can be reimplemented to report a particular source +instance as read-only, even if it is theoretically writable, for example due to +insufficient permissions. + +:class:`~datasalad.settings.CachingSource` is a writable source implementation +with an in-memory cache. It only requires implementing +:meth:`~datasalad.settings.Source.load` when set items shall not be written to +the underlying source, but are only cached in memory. Otherwise, all standard +getters and setters need to be wrapped accordingly. + +Lastly, :class:`~datasalad.settings.InMemory` is a readily usable, +"source-less" items source, which is also the basis for +:class:`~datasalad.settings.Defaults`. + + +Notes on type-coercion and validation +------------------------------------- + +Type-coercion and validation is solely done on access of a +:class:`~datasalad.settings.setting.Setting` instance's +:attr:`~datasalad.settings.setting.Setting.value` property. There is no +on-load validation to reject invalid configuration immediately. This approach +is taken to avoid spending time on items that might never actually get +accessed. + +There is also no generic on-write validation. This has to be done for each +source implementation separately and explicitly. There is no assumption of +homogeneity regarding what type and values are acceptable across sources. + + +API reference +------------- + +.. currentmodule:: datasalad.settings +.. autosummary:: + :toctree: generated + + Settings + Setting + Source + WritableSource + CachingSource + Environment + InMemory + Defaults + UnsetValue +""" + +from __future__ import annotations + +from .defaults import Defaults +from .env import Environment +from .setting import ( + Setting, + UnsetValue, +) +from .settings import Settings +from .source import ( + CachingSource, + InMemory, + Source, + WritableSource, +) + +__all__ = [ + 'CachingSource', + 'Defaults', + 'Environment', + 'InMemory', + 'Setting', + 'Settings', + 'Source', + 'WritableSource', + 'UnsetValue', +] diff --git a/datasalad/settings/defaults.py b/datasalad/settings/defaults.py new file mode 100644 index 0000000..a55f8e4 --- /dev/null +++ b/datasalad/settings/defaults.py @@ -0,0 +1,54 @@ +from __future__ import annotations + +import logging +from typing import ( + TYPE_CHECKING, + Hashable, +) + +from datasalad.settings.source import InMemory + +if TYPE_CHECKING: + from datasalad.settings.setting import Setting + +lgr = logging.getLogger('datasalad.settings') + + +class Defaults(InMemory): + """Source for collecting implementation defaults of settings + + Such defaults are not loaded from any source. Clients have to set any items + they want to see a default be known for. There would typically be only one + instance of this class, and it would then be the true source of this + information by itself. + + The difference to :class:`InMemory` is minimal. It is limited + to emitting a debug-level log message when setting the value of an item + that has already been set before. + + >>> from datasalad.settings import Defaults, InMemory, Setting, Settings + >>> defaults = Defaults() + >>> defaults['myswitch'] = Setting( + ... 'on', coercer=lambda x: {'on': True, 'off': False}[x] + ... ) + >>> defaults['myswitch'].value + True + >>> settings = Settings({'overrides': InMemory(), 'defaults': defaults}) + >>> settings['myswitch'].value + True + >>> settings.sources['overrides']['myswitch'] = Setting('off') + >>> settings['myswitch'].value + False + >>> settings.sources['overrides']['myswitch'] = Setting('broken') + >>> settings['myswitch'].value + Traceback (most recent call last): + KeyError: 'broken' + """ + + def __setitem__(self, key: Hashable, value: Setting) -> None: + if key in self: + # resetting is something that is an unusual event. + # __setitem__ does not allow for a dedicated "force" flag, + # so we leave a message at least + lgr.debug('Resetting %r default', key) + super().__setitem__(key, value) diff --git a/datasalad/settings/env.py b/datasalad/settings/env.py new file mode 100644 index 0000000..f13f680 --- /dev/null +++ b/datasalad/settings/env.py @@ -0,0 +1,176 @@ +from __future__ import annotations + +import logging +from os import ( + environ, +) +from os import ( + name as os_name, +) +from typing import ( + TYPE_CHECKING, + Any, + Hashable, +) + +from datasalad.settings.setting import Setting +from datasalad.settings.source import WritableSource + +if TYPE_CHECKING: + from collections.abc import Collection + +lgr = logging.getLogger('datasalad.settings') + + +class Environment(WritableSource): + """Process environment source + + This is a stateless source implementation that gets and sets items directly + in the process environment. + + Environment variables to be read can be filtered by declaring a name + prefix. More complex filter rules can be implemented by replacing the + :meth:`include_var()` method in a subclass. + + It is possible to transform an environment variable name to a setting key + (and vice versa), by implementing the methods + :meth:`get_key_from_varname()` and :meth:`get_varname_from_key()`. + + .. attention:: + + Due to peculiarities of the behavior of Python's ``os.environ`` on the + windows platform (and ``os2``), all variable names are converted to + upper case, and are effectively treated as case-insensitive, on that + platform. For this default implementation this implies that + the :meth:`~Environment.keys` method can only ever return uppercase keys. + Reimplement :meth:`~Environment.get_key_from_varname` to change this. + Retrieving a value for an individual key will nevertheless work for the + default implementation even with a lowercase or mixed case key. + """ + + def __init__( + self, + *, + var_prefix: str | None = None, + ): + super().__init__() + self._var_prefix = ( + var_prefix.upper() + if var_prefix is not None and os_name in ('os2', 'nt') + else var_prefix + ) + + def reinit(self): + """Does nothing""" + + def load(self) -> None: + """Does nothing + + All accessors inspect the process environment directly. + """ + + def __getitem__(self, key: Hashable) -> Setting: + return Setting(value=environ[self.get_varname_from_key(key)]) + + def __setitem__(self, key: Hashable, value: Setting) -> None: + name = self.get_varname_from_key(key) + environ[name] = str(value.value) + + def __delitem__(self, key: Hashable) -> None: + name = self.get_varname_from_key(key) + del environ[name] + + def get(self, key, default: Any = None) -> Setting: + try: + return self[key] + except KeyError: + if isinstance(default, Setting): + return default + return Setting(value=default) + + def keys(self) -> Collection: + """Returns all keys that can be determined from the environment + + .. attention:: + + Due to peculiarities of the behavior of Python's ``os.environ`` on + the windows platform (and ``os2``), this method can only report + uppercase keys with the default implementation. Reimplement + :meth:`get_key_from_varname()` to modify this behavior. + """ + varmap = { + k: self.get_key_from_varname(k) + for k, v in environ.items() + if self.include_var(name=k, value=v) + } + _keys = set(varmap.values()) + if len(_keys) < len(varmap): + allkeys = list(varmap.values()) + lgr.warning( + 'Ambiguous ENV variables map on identical keys: %r', + { + key: [k for k in sorted(varmap) if varmap[k] == key] + for key in _keys + if allkeys.count(key) > 1 + }, + ) + return _keys + + def __str__(self): + return f'Environment[{self._var_prefix}]' if self._var_prefix else 'Environment' + + def __contains__(self, key: Hashable) -> bool: + # we only need to reimplement this due to Python's behavior to + # forece-modify environment variable names on Windows. Only + # talking directly for environ accounts for that + return self.get_varname_from_key(key) in environ + + def __repr__(self): + # TODO: list keys? + return 'Environment()' + + def include_var( + self, + name: str, + value: str, # noqa: ARG002 (default implementation does not need it) + ) -> bool: + """Determine whether to source a setting from an environment variable + + This default implementation tests whether the name of the variable + starts with the ``var_prefix`` given to the constructor. + + Reimplement this method to perform custom tests. + """ + return name.startswith(self._var_prefix or '') + + def get_key_from_varname(self, name: str) -> Hashable: + """Transform an environment variable name to a setting key + + This default implementation returns the unchanged name as a key. + + Reimplement this method and ``get_varname_from_key()`` to perform + custom transformations. + """ + return name + + def get_varname_from_key(self, key: Hashable) -> str: + """Transform a setting key to an environment variable name + + This default implementation only checks for illegal names and + raises a ``ValueError``. Otherwise it returns the unchanged key. + + .. attention:: + + Due to peculiarities of the behavior of Python's ``os.environ`` + on the windows platform, all variable names are converted to + upper case, and are effectively treated as case-insensitive, + on that platform. + """ + varname = str(key) + if '=' in varname or '\0' in varname: + msg = "illegal environment variable name (contains '=' or NUL)" + raise ValueError(msg) + if os_name in ('os2', 'nt'): + # https://stackoverflow.com/questions/19023238/why-python-uppercases-all-environment-variables-in-windows + return varname.upper() + return varname diff --git a/datasalad/settings/setting.py b/datasalad/settings/setting.py new file mode 100644 index 0000000..a468c3b --- /dev/null +++ b/datasalad/settings/setting.py @@ -0,0 +1,124 @@ +from __future__ import annotations + +from copy import copy +from typing import ( + Any, + Callable, +) + + +class UnsetValue: + """Placeholder type to indicate a value that has not been set""" + + +class Setting: + """Representation of an individual setting""" + + def __init__( + self, + value: Any | UnsetValue = UnsetValue, + *, + coercer: Callable | None = None, + lazy: bool = False, + ): + """ + ``value`` can be of any type. A setting instance created with + default :class:`UnsetValue` represents a setting with no known value. + + The ``coercer`` is a callable that processes a setting value + on access via :attr:`value`. This callable can perform arbitrary + processing, including type conversion and validation. + + If ``lazy`` is ``True``, ``value`` must be a callable that requires + no parameters. This callable will be executed each time :attr:`value` + is accessed, and its return value is passed to the ``coercer``. + """ + if lazy and not callable(value): + msg = 'callable required for lazy evaluation' + raise ValueError(msg) + self._value = value + self._coercer = coercer + self._lazy = lazy + + @property + def pristine_value(self) -> Any: + """Original, uncoerced value""" + return self._value + + @property + def value(self) -> Any: + """Value of a setting after coercion + + For a lazy setting, accessing this property also triggers the + evaluation. + """ + # we ignore the type error here + # "error: "UnsetValue" not callable" + # because we rule this out in the constructor + val = self._value() if self._lazy else self._value # type: ignore [operator] + if self._coercer: + return self._coercer(val) + return val + + @property + def coercer(self) -> Callable | None: + """``coercer`` of a setting, or ``None`` if there is none""" + return self._coercer + + @property + def is_lazy(self) -> bool: + """Flag whether the setting evaluates on access""" + return self._lazy + + def update(self, other: Setting) -> None: + """Update the item from another + + This replaces any ``value`` or ``coercer`` set in the other + setting. If case the other's ``value`` is :class:`UnsetValue` + no update of the ``value`` is made. Likewise, if ``coercer`` + is ``None``, no update is made. Update to or from a ``lazy`` + value will also update the ``lazy`` property accordingly. + """ + if other._value is not UnsetValue: # noqa: SLF001 + self._value = other._value # noqa: SLF001 + # we also need to synchronize the lazy eval flag + # so we can do the right thing (TM) with the + # new value + self._lazy = other._lazy # noqa: SLF001 + + if other._coercer: # noqa: SLF001 + self._coercer = other._coercer # noqa: SLF001 + + def __str__(self) -> str: + # wrap the value in the classname to make clear that + # the actual object type is different from the value + return f'{self.__class__.__name__}({self._value})' + + def __repr__(self) -> str: + # wrap the value in the classname to make clear that + # the actual object type is different from the value + return ( + f'{self.__class__.__name__}(' + f'{self._value!r}' + f', coercer={self._coercer!r}' + f', lazy={self._lazy}' + ')' + ) + + def __eq__(self, item: object) -> bool: + """ + This default implementation of comparing for equality only compare the + types, value, and coercer of the two items. If additional criteria are + relevant for derived classes :meth:`__eq__` has to be reimplemented. + """ + if not isinstance(item, type(self)): + return False + return ( + self._lazy == item._lazy + and self._value == item._value + and self._coercer == item._coercer + ) + + def copy(self): + """Return a shallow copy of the instance""" + return copy(self) diff --git a/datasalad/settings/settings.py b/datasalad/settings/settings.py new file mode 100644 index 0000000..39d2004 --- /dev/null +++ b/datasalad/settings/settings.py @@ -0,0 +1,147 @@ +from __future__ import annotations + +from copy import copy +from itertools import chain +from types import MappingProxyType +from typing import ( + TYPE_CHECKING, + Any, + Hashable, +) + +from datasalad.settings.setting import Setting + +if TYPE_CHECKING: + from datasalad.settings import Source + + +class Settings: + """Query across different sources of settings + + This class implements key parts of the standard ``dict`` interface + (with some additions). + + An instance is initialized with an ordered mapping of source identifiers + to :class:`~datasalad.settings.Source` instances. The order reflects + the precedence rule with which settings and their properties are selected + for reporting across sources. Source declared earlier take precedence over + sources declared later. + + When an individual setting is requested via the ``__getitem__()`` method, a + "flattened" representation of the item across all sources is determined and + returned. This is not necessarily a setting that exists in this exact form + at any source. Instead, for each setting property the value from the source + with the highest precedence is looked up and used for the return item. + + In practice, this means that, for example, a ``coercer`` can come from a + lower-precedence source and the setting's ``value`` from a different + higher-precedence source. + + See :meth:`~Settings.getall` for an alternative access method. + """ + + item_type: type = Setting + """Type to wrap default value in for :meth:`get()` and + :meth:`getall()`.""" + + def __init__( + self, + sources: dict[str, Source], + ): + # we keep the sources strictly separate. + # the order here matters and represents the + # precedence rule + self._sources = sources + + @property + def sources(self) -> MappingProxyType: + """Read-only mapping of source identifiers to source instance + + This property is used to select individual sources for source-specific + operations, such as writing a setting to an underlying source. + """ + return MappingProxyType(self._sources) + + def __len__(self): + return len(self.keys()) + + def __getitem__(self, key: Hashable) -> Setting: + """Some""" + # this will become the return item + item: Setting | None = None + # now go from the back + # - start with the first Setting class instance we get + # - update a copy of this particular instance with all information + # from sources with higher priority and flatten it across + # sources + for s in reversed(self._sources.values()): + update_item = None + try: + update_item = s[key] + except KeyError: + # source does not have it, proceed + continue + if item is None: + # in-place modification and destroy the original + # item's integrity + item = copy(update_item) + continue + # we run the update() method of the first item we ever found. + # this will practically make the type produced by the lowest + # precedence source define the behavior. This is typically + # some kind of implementation default + item.update(update_item) + if item is None: + # there was nothing + raise KeyError + return item + + def __contains__(self, key: Hashable): + return any(key in s for s in self._sources.values()) + + def keys(self) -> set[Hashable]: + """Returns all setting keys known across all sources""" + return set(chain.from_iterable(s.keys() for s in self._sources.values())) + + def get(self, key: Hashable, default: Any = None) -> Setting: + """Return a particular setting identified by its key, or a default + + The composition of the returned setting follows the same rules + as the access via ``__getitem__``. + + When the ``default`` value is not given as an instance of + :class:`~datasalad.settings.Setting`, it will be + automatically wrapped into the one given by :attr:`Settings.item_type`. + """ + try: + return self[key] + except KeyError: + return self._get_default_setting(default) + + def getall( + self, + key: Hashable, + default: Any = None, + ) -> tuple[Setting, ...]: + """Returns a tuple of all known setting instances for a key across sources + + If no source has any information for a given key, a length-one tuple + with a :class:`~datasalad.settings.Setting` instance for the given + ``default`` value is returned. + """ + # no flattening, get all from all + items: tuple[Setting, ...] = () + for s in reversed(self._sources.values()): + if key in s: + # we checked before, no need to handle a default here + items = ( + (*items, *s.getall(key)) + if hasattr(s, 'getall') + else (*items, s[key]) + ) + return items if items else (self._get_default_setting(default),) + + def _get_default_setting(self, default: Any) -> Setting: + if isinstance(default, Setting): + return default + return self.item_type(value=default) diff --git a/datasalad/settings/source.py b/datasalad/settings/source.py new file mode 100644 index 0000000..104bc34 --- /dev/null +++ b/datasalad/settings/source.py @@ -0,0 +1,270 @@ +from __future__ import annotations + +from abc import ( + ABC, + abstractmethod, +) +from typing import ( + TYPE_CHECKING, + Any, + Generator, + Hashable, +) + +from datasalad.settings.setting import Setting + +if TYPE_CHECKING: + from collections.abc import Collection + + +class Source(ABC): + """Abstract base class a settings source. + + This class offers a ``dict``-like interface. Individual settings can be + retrieved via the standard accessor methods :meth:`~Source.__getitem__`, + and :meth:`~Source.get`. + + A number of methods have to be implemented for any concrete source + (see their documentation for details on the expected behavior): + + - :meth:`Source.reinit` + - :meth:`Source.reload` + - :meth:`Source.keys` + - :meth:`Source.__getitem__` + + This class is in itself a suitable base for a generic read-only setting + source. For other scenarios alternative base class are also available: + + - :class:`~datasalad.settings.WritableSource` + - :class:`~datasalad.settings.CachingSource` + - :class:`~datasalad.settings.InMemory` + """ + + item_type: type = Setting + """Type to wrap default value in for :meth:`get()`""" + + @abstractmethod + def load(self) -> None: + """Implements loading items from the underlying source. + + It is expected that after calling this method, an instance of this + source reports on items according to the latest state of the source. + + No side-effects are implied. Particular implementations may + even choose to have this method be a no-op. + + Importantly, calling this method does not imply a call to + :meth:`~Source.reinit`. If a from-scratch reload is desired, + :meth:`~Source.reinit` must be called explicitly. + """ + + @abstractmethod + def reinit(self) -> None: + """Re-initialize source instance + + Re-initializing is resetting any state of the source interface instance + such that a subsequent :meth:`~Source.load` fully synchronizes the + reporting of settings with the state of the underlying source. Calling + this method does *not* imply resetting the underlying settings source + (e.g., removing all settings from the source). + """ + + @abstractmethod + def __getitem__(self, key: Hashable) -> Setting: + """ """ + + @abstractmethod + def keys(self) -> Collection: + """Returns all setting keys known to a source""" + + @property + def is_writable(self) -> bool: + """Flag whether configuration item values can be set at the source + + This default implementation returns ```False``. + """ + return False + + def get(self, key: Hashable, default: Any = None) -> Setting: + """Return a particular setting identified by its key, or a default + + This method calls ``__getitem__``, and returns the default on + a ``KeyError`` exception. + + When the ``default`` value is not given as an instance of + :class:`~datasalad.settings.Setting`, it will be + automatically wrapped into the one given by :attr:`Source.item_type`. + """ + try: + val = self[key] + except KeyError: + return self._get_default_setting(default) + if isinstance(val, tuple): + return val[-1] + return val + + def getall(self, key: Hashable, default: Any = None) -> tuple[Setting, ...]: + """Return all individual settings registered for a key + + This default implementation returns a length-one tuple with the + return value of :meth:`~Source.get`. + + Derived classes for source that can represent multiple values for + a single key should reimplement this method appropriately. + """ + return (self.get(key, default),) + + def __len__(self) -> int: + return len(self.keys()) + + def __contains__(self, key: Hashable) -> bool: + return key in self.keys() + + def __iter__(self) -> Generator[Hashable]: + yield from self.keys() + + def _get_default_setting(self, default: Any) -> Setting: + if isinstance(default, Setting): + return default + return self.item_type(value=default) + + +class WritableSource(Source): + """Extends ``Source`` with a setter interface + + By default, the :attr:`is_writable` property of a class instance is + ``True``. + """ + + @abstractmethod + def __setitem__(self, key: Hashable, value: Setting) -> None: + """ """ + + @abstractmethod + def __delitem__(self, key: Hashable): + """ """ + + @property + def is_writable(self) -> bool: + """Flag whether configuration item values can be set at the source + + This default implementation returns ```True``. + """ + return True + + +class CachingSource(WritableSource): + """Extends ``WritableSource`` with an in-memory cache + + On first access of any setting the ``reinit()`` and ``load()`` methods of a + subclass are called. + + On load, an implementation can use the standard ``__setitem__()`` method of + this class directly to populate the cache. Any subsequent read access is + reported directly from this cache. + + Subclasses should generally reimplement ``__setitem__()`` to call the base + class implementation in addition to setting a value in the actual source. + """ + + def __init__(self) -> None: + super().__init__() + self.__items: dict[Hashable, Setting | tuple[Setting, ...]] | None = None + + @property + def _items(self) -> dict[Hashable, Setting | tuple[Setting, ...]]: + if self.__items is None: + self.reinit() + self.load() + if TYPE_CHECKING: + assert self.__items is not None + return self.__items + + def reinit(self) -> None: + # particular implementations may not use this facility, + # but it is provided as a convenience. Maybe factor + # it out into a dedicated subclass even. + self.__items = {} + + def __len__(self) -> int: + return len(self._items) + + def __getitem__(self, key: Hashable) -> Setting: + val = self._items[key] + if isinstance(val, tuple): + return val[-1] + return val + + def __setitem__(self, key: Hashable, value: Setting) -> None: + if not self.is_writable: + raise NotImplementedError + self._items[key] = value + + def __delitem__(self, key: Hashable): + del self._items[key] + + def __contains__(self, key: Hashable) -> bool: + return key in self._items + + def keys(self) -> Collection[Hashable]: + return self._items.keys() + + def add(self, key: Hashable, value: Setting) -> None: + if not self.is_writable: + raise NotImplementedError + if key in self: + self._items[key] = (*self.getall(key), value) + else: + self._items[key] = value + + def __repr__(self) -> str: + return f'{self.__class__.__name__}({self._items!r})' + + def __str__(self) -> str: + return ''.join( + ( + f'{self.__class__.__name__}(', + ','.join( + # we use the pristine value here to avoid issues + # with validation/coercion failures when rendering + # sources + f'{k}=({",".join(repr(val.pristine_value) for val in v)})' + if isinstance(v, tuple) + else f'{k}={v.pristine_value!r}' + for k, v in self._items.items() + ), + ')', + ) + ) + + def getall(self, key: Hashable, default: Any = None) -> tuple[Setting, ...]: + try: + val = self._items[key] + except KeyError: + return (self._get_default_setting(default),) + if isinstance(val, tuple): + return val + return (val,) + + # TODO: __iter__ + + +class InMemory(CachingSource): + """Extends ``CachingSource`` with a no-op ``load()`` implementation + + This class provides a directly usable implementation of a setting source + that manages all settings in memory only, and does not load information + from any actual source. + """ + + is_writable = True + + def load(self) -> None: + """Does nothing + + An instance of :class:`InMemory` has no underlying source + to load from. + """ + + def __str__(self): + return f'{self.__class__.__name__}' diff --git a/datasalad/settings/tests/__init__.py b/datasalad/settings/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/datasalad/settings/tests/test_defaults.py b/datasalad/settings/tests/test_defaults.py new file mode 100644 index 0000000..6eb4918 --- /dev/null +++ b/datasalad/settings/tests/test_defaults.py @@ -0,0 +1,53 @@ +import logging +import sys +from os.path import dirname + +from ..defaults import Defaults +from ..setting import Setting + + +def test_defaultsrc(caplog): + d = Defaults() + assert str(d) == 'Defaults' + + # smoke test NO-OP method + d.load() + + target_key = 'some.key' + orig_value = 'mike' + updated_value = 'allnew' + + assert target_key not in d + assert d.get(target_key, 'default').value == 'default' + assert d.get(target_key, Setting('default2')).value == 'default2' + d[target_key] = Setting(orig_value) + assert d[target_key].value == orig_value + assert 'Resetting' not in caplog.text + with caplog.at_level(logging.DEBUG): + # we get a debug message when a default is reset + d[target_key] = Setting(updated_value) + assert 'Resetting' in caplog.text + assert d[target_key].value == updated_value + del d[target_key] + assert target_key not in d + + d[target_key] = Setting(orig_value) + assert len(d) == 1 + d.reinit() + assert target_key not in d + assert len(d) == 0 + + +def test_defaultsrc_dynamic(): + d = Defaults() + target_key = 'some.key' + dynset = Setting( + lambda: sys.executable, + coercer=dirname, + lazy=True, + ) + assert dynset.value == dirname(sys.executable) + + d[target_key] = dynset + item = d[target_key] + assert item.value == dirname(sys.executable) diff --git a/datasalad/settings/tests/test_env.py b/datasalad/settings/tests/test_env.py new file mode 100644 index 0000000..d9647ed --- /dev/null +++ b/datasalad/settings/tests/test_env.py @@ -0,0 +1,143 @@ +from os import ( + environ, +) +from os import ( + name as os_name, +) +from typing import Hashable +from unittest.mock import patch + +import pytest + +from ..env import Environment +from ..setting import Setting + + +def test_envsrc(): + assert str(Environment()) == 'Environment' + assert str(Environment(var_prefix='DATALAD_')) == 'Environment[DATALAD_]' + assert repr(Environment()) == 'Environment()' + + # smoke test NO-OP methods + env = Environment() + env.reinit() + env.load() + + +def test_envsrc_illegal_keys(): + env = Environment() + # prevent any accidental modification + with patch.dict(environ, {}): + with pytest.raises(ValueError, match='illegal'): + env['mustnothave=char'] = 'some' + with pytest.raises(ValueError, match='illegal'): + env['mustnothave\0char'] = 'some' + + +# traditional datalad name transformation approach +class DataladLikeEnvironment(Environment): + def get_key_from_varname(self, name: str) -> Hashable: + return name.replace('__', '-').replace('_', '.').casefold() + + def get_varname_from_key(self, key: Hashable) -> str: + # note that this is not actually a real inverse transform + return str(key).replace('.', '_').replace('-', '__').upper() + + +def test_envsrc_get(monkeypatch): + target_key = 'datalad.chunky-monkey.feedback' + target_value = 'ohmnomnom' + absurd_must_be_absent_key = 'nobody.would.use.such.a.key' + with monkeypatch.context() as m: + m.setenv('DATALAD_CHUNKY__MONKEY_FEEDBACK', 'ohmnomnom') + env = DataladLikeEnvironment(var_prefix='DATALAD_') + assert target_key in env.keys() # noqa: SIM118 + assert target_key in env + assert env.get(target_key).value == target_value + # default is wrapped into Setting if needed + assert env.get(absurd_must_be_absent_key, target_value).value is target_value + assert ( + env.get(absurd_must_be_absent_key, Setting(value=target_value)).value + is target_value + ) + # assert env.getvalue(target_key) == target_value + # assert env.getvalue(absurd_must_be_absent_key) is None + assert len(env) + + +def test_envsrc_ambiguous_keys(monkeypatch, caplog): + target_key = 'datalad.chunky-monkey.feedback' + target_value = 'ohmnomnom' + with monkeypatch.context() as m: + # define two different setting that map on the same key + # with datalad's mapping rules + m.setenv('DATALAD_CHUNKY__monkey_FEEDBACK', 'würg') + m.setenv('DATALAD_CHUNKY__MONKEY_FEEDBACK', 'ohmnomnom') + env = DataladLikeEnvironment(var_prefix='DATALAD_') + # we still get the key's value + assert env[target_key].value == target_value + # negative test to make the next one count + assert 'map on identical' not in caplog.text + assert env.keys() == {target_key} + # we saw a log message complaining about the ambiguous + # key + if os_name not in ('os2', 'nt'): + # not testing on platforms where Python handles vars + # in case insensitive manner + assert ( + 'Ambiguous ENV variables map on identical keys: ' + "{'datalad.chunky-monkey.feedback': " + "['DATALAD_CHUNKY__MONKEY_FEEDBACK', " + "'DATALAD_CHUNKY__monkey_FEEDBACK']}" + ) in caplog.text + + +def test_envsrc_set(): + env = Environment() + + with patch.dict(environ, {}): + env['some.key'] = Setting(value='mike') + assert 'some.key' in env + + # the instance is stateless, restoring the original + # env removes any knowledge of the key + assert 'some.key' not in env + + +def test_envsrc_del(): + env = Environment() + + with patch.dict(environ, {}): + env['some.key'] = Setting(value='mike') + assert 'some.key' in env + del env['some.key'] + assert 'some.key' not in env + + # the instance is stateless, restoring the original + # env removes any knowledge of the key + assert 'some.key' not in env + + +def test_envsrc_set_matching_transformed(): + env = DataladLikeEnvironment(var_prefix='DATALAD_') + env_name = 'DATALAD_SOME_KEY' + orig_value = 'mike' + updated_value = 'allnew' + + with patch.dict(environ, {env_name: orig_value}): + assert 'datalad.some.key' in env + assert env['datalad.some.key'].value == orig_value + env['datalad.some.key'] = Setting(updated_value) + # the new value is set for the inverse-transformed + # variable name + assert environ.get(env_name) == updated_value + + +def test_envsrc_lowercase_keys(): + with patch.dict(environ, {}): + env = Environment(var_prefix='myapp_') + env['myapp_conf'] = Setting(123, coercer=str) + assert ( + env.keys() == {'MYAPP_CONF'} if os_name in ('os2', 'nt') else {'myapp_conf'} + ) + assert env['myapp_conf'].value == '123' diff --git a/datasalad/settings/tests/test_setting.py b/datasalad/settings/tests/test_setting.py new file mode 100644 index 0000000..cc4d3f8 --- /dev/null +++ b/datasalad/settings/tests/test_setting.py @@ -0,0 +1,37 @@ +import pytest + +from ..setting import Setting + + +def test_setting(): + with pytest.raises(ValueError, match='callable required'): + Setting(5, lazy=True) + + test_val = 5 + item = Setting(lambda: test_val, lazy=True) + assert item.is_lazy is True + assert item.value == test_val + + assert 'lambda' in str(item) + + test_val = 4 + item.update(Setting(str(test_val), coercer=int)) + assert item.is_lazy is False + assert item.value == test_val + + item.update(Setting(coercer=float)) + assert item.value == float(test_val) + + +def test_setting_derived_copy(): + class MySetting(Setting): + def __init__(self, allnew: str): + self.allnew = allnew + + target = 'dummy' + ms = MySetting(target) + ms_c = ms.copy() + assert ms_c.allnew == target + + # __eq__ considers the derived type and rejects + assert ms != Setting(target) diff --git a/datasalad/settings/tests/test_settings.py b/datasalad/settings/tests/test_settings.py new file mode 100644 index 0000000..29c606a --- /dev/null +++ b/datasalad/settings/tests/test_settings.py @@ -0,0 +1,66 @@ +import sys + +import pytest + +from ..defaults import Defaults +from ..setting import Setting +from ..settings import Settings +from ..source import InMemory + + +def test_settings(): + man = Settings( + { + 'mem1': InMemory(), + 'mem2': InMemory(), + 'defaults': Defaults(), + } + ) + + assert list(man.sources.keys()) == ['mem1', 'mem2', 'defaults'] + assert len(man) == 0 + target_key = 'some.key' + assert target_key not in man + with pytest.raises(KeyError): + man[target_key] + + man.sources['defaults'][target_key] = Setting('0', coercer=int) + assert man[target_key].value == 0 + + man.sources['mem2'][target_key] = Setting('1', coercer=float) + man.sources['mem1'][target_key] = Setting('2') + + coerced_target = 2.0 + item = man[target_key] + assert item.value == coerced_target + assert item.coercer == float + + vals = man.getall(target_key) + assert isinstance(vals, tuple) + # one per source here + # TODO: enhance test case to have a multi-value setting in a single source + nsources = 3 + assert len(vals) == nsources + assert [v.value for v in vals] == [0, 1.0, '2'] + + vals = man.getall('idonotexist') + assert isinstance(vals, tuple) + assert vals == (Setting(None),) + + vals = man.getall('idonotexist', Setting(True)) + assert isinstance(vals, tuple) + assert vals == (Setting(True),) + + assert man.get('idonotexist').value is None + assert ( + man.get( + 'idonotexist', + # makes little actual sense, but exercises a lazy + # default setting + Setting( + lambda: sys.executable, + lazy=True, + ), + ).value + is sys.executable + ) diff --git a/datasalad/settings/tests/test_source.py b/datasalad/settings/tests/test_source.py new file mode 100644 index 0000000..db88896 --- /dev/null +++ b/datasalad/settings/tests/test_source.py @@ -0,0 +1,74 @@ +from ..setting import Setting +from ..source import ( + CachingSource, + InMemory, + Source, +) + + +class DummyCachingSource(CachingSource): + def load(self): + pass + + +def test_inmemorysrc(): + mem = InMemory() + assert str(mem) == 'InMemory' + + target_key = 'dummy' + mem[target_key] = Setting('dummy') + assert mem.getall('dummy') == (Setting('dummy'),) + assert str(InMemory()) == 'InMemory' + + +def test_cachingsource(): + ds = DummyCachingSource() + ds['mike'] = Setting('one') + assert ds['mike'] == Setting('one') + assert ds.get('mike') == Setting('one') + assert str(ds) == "DummyCachingSource(mike='one')" + assert repr(ds) == ( + 'DummyCachingSource(' "{'mike': Setting('one', coercer=None, lazy=False)})" + ) + + ds.add('mike', Setting('two')) + assert ds['mike'].value == 'two' + assert ds.get('mike').value == 'two' + assert ds.getall('mike') == (Setting('one'), Setting('two')) + + assert ds.getall('nothere') == (Setting(None),) + assert ds.getall('nothere', Setting(True)) == (Setting(True),) + + ds.add('notherebefore', Setting('butnow')) + assert ds['notherebefore'].value == 'butnow' + + +def test_settings_base_default_methods(): + class DummySource(Source): + def load(self): # pragma: no cover + pass + + def reinit(self): # pragma: no cover + pass + + def __getitem__(self, key): # pragma: no cover + if key == 'plain': + return 'plain' + if key == 'tuple': + return ('plain',) + return + + def keys(self): + return {'mykey', 'plain', 'tuple'} + + src = DummySource() + assert 'mykey' in src + # smoke test for __iter__ + assert set(src) == src.keys() + + assert not src.is_writable + + assert src.get('plain') == 'plain' + assert src.get('tuple') == 'plain' + assert src.getall('plain') == ('plain',) + assert src.getall('tuple') == ('plain',) diff --git a/docs/index.rst b/docs/index.rst index 85b8bc8..006d9c2 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -78,6 +78,7 @@ Also see the :ref:`modindex`. runners iterable_subprocess itertools + settings Why ``datasalad``? diff --git a/pyproject.toml b/pyproject.toml index 49d9f4f..f6293a3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,7 +132,7 @@ extra-dependencies = [ ] [tool.hatch.envs.cz.scripts] check-commits = [ - # check all commit messages since we switched to convential commits + # check all commit messages since we switched to conventional commits # only (no merge commits also) "cz check --rev-range a518855b7e2a08d8a5ba6a36070425457271857b..HEAD", ]