From 88438342dd12ee32e38de54b911ea5038b012d3e Mon Sep 17 00:00:00 2001 From: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> Date: Mon, 6 Jan 2025 16:32:42 +0100 Subject: [PATCH] Create standard/schema for `Models` (#131) * add schema * rename path args to `filename` * change `Models` to be a `RootModel` * update doc links; bump mkstd req --------- Co-authored-by: Daniel Weindl --- doc/problem_definition.rst | 7 +- doc/standard/make_schemas.py | 2 + doc/standard/models.yaml | 86 +++++++++++++++++++ petab_select/model.py | 20 ++--- petab_select/models.py | 162 +++++++++++++++++++++++------------ pyproject.toml | 2 +- 6 files changed, 211 insertions(+), 68 deletions(-) create mode 100644 doc/standard/models.yaml diff --git a/doc/problem_definition.rst b/doc/problem_definition.rst index 9545e30..0e2d92c 100644 --- a/doc/problem_definition.rst +++ b/doc/problem_definition.rst @@ -152,7 +152,12 @@ Brief format description Schema ^^^^^^ -The format is provided as `YAML-formatted JSON schema <_static/model.yaml>`_, which enables easy validation with various third-party tools. +The schema are provided as YAML-formatted JSON schema, which enables easy validation with various third-party tools. Schema are provided for: + +- `a single model <_static/model.yaml>`_, and +- `a list of models <_static/models.yaml>`_, which is simply a YAML list of the single model format. + +Below is the schema for a single model. .. literalinclude:: standard/model.yaml :language: yaml diff --git a/doc/standard/make_schemas.py b/doc/standard/make_schemas.py index 8e371a1..c01c62b 100644 --- a/doc/standard/make_schemas.py +++ b/doc/standard/make_schemas.py @@ -1,3 +1,5 @@ from petab_select.model import ModelStandard +from petab_select.models import ModelsStandard ModelStandard.save_schema("model.yaml") +ModelsStandard.save_schema("models.yaml") diff --git a/doc/standard/models.yaml b/doc/standard/models.yaml new file mode 100644 index 0000000..a90f3d6 --- /dev/null +++ b/doc/standard/models.yaml @@ -0,0 +1,86 @@ +$defs: + Model: + description: "A model.\n\nSee :class:`ModelBase` for the standardized attributes.\ + \ Additional\nattributes are available in ``Model`` to improve usability.\n\n\ + Attributes:\n _model_subspace_petab_problem:\n The PEtab problem of\ + \ the model subspace of this model.\n If not provided, this is reconstructed\ + \ from\n :attr:`model_subspace_petab_yaml`." + properties: + model_subspace_id: + title: Model Subspace Id + type: string + model_subspace_indices: + items: + type: integer + title: Model Subspace Indices + type: array + criteria: + additionalProperties: + type: number + title: Criteria + type: object + model_hash: + $ref: '#/$defs/ModelHash' + default: null + model_subspace_petab_yaml: + anyOf: + - format: path + type: string + - type: 'null' + title: Model Subspace Petab Yaml + estimated_parameters: + anyOf: + - additionalProperties: + type: number + type: object + - type: 'null' + default: null + title: Estimated Parameters + iteration: + anyOf: + - type: integer + - type: 'null' + default: null + title: Iteration + model_id: + default: null + title: Model Id + type: string + parameters: + additionalProperties: + anyOf: + - type: number + - type: integer + - const: estimate + type: string + title: Parameters + type: object + predecessor_model_hash: + $ref: '#/$defs/ModelHash' + default: null + required: + - model_subspace_id + - model_subspace_indices + - model_subspace_petab_yaml + - parameters + title: Model + type: object + ModelHash: + type: string +description: 'A collection of models. + + + Provide a PEtab Select ``problem`` to the constructor or via + + ``set_problem``, to use add models by hashes. This means that all models + + must belong to the same PEtab Select problem. + + + This permits both ``list`` and ``dict`` operations -- see + + :class:``ListDict`` for further details.' +items: + $ref: '#/$defs/Model' +title: Models +type: array diff --git a/petab_select/model.py b/petab_select/model.py index ae92df8..737e281 100644 --- a/petab_select/model.py +++ b/petab_select/model.py @@ -362,21 +362,21 @@ def _fix_predecessor_model_hash(self: ModelBase) -> ModelBase: def to_yaml( self, - yaml_path: str | Path, + filename: str | Path, ) -> None: """Save a model to a YAML file. - All paths will be made relative to the ``yaml_path`` directory. + All paths will be made relative to the ``filename`` directory. Args: - yaml_path: - The model YAML file location. + filename: + Location of the YAML file. """ - root_path = Path(yaml_path).parent + root_path = Path(filename).parent model = copy.deepcopy(self) model.set_relative_paths(root_path=root_path) - ModelStandard.save_data(data=model, filename=yaml_path) + ModelStandard.save_data(data=model, filename=filename) def set_relative_paths(self, root_path: str | Path) -> None: """Change all paths to be relative to ``root_path``.""" @@ -686,16 +686,16 @@ def get_parameter_values( @staticmethod def from_yaml( - yaml_path: str | Path, + filename: str | Path, ) -> Model: """Load a model from a YAML file. Args: - yaml_path: - The model YAML file location. + filename: + Location of the YAML file. """ model = ModelStandard.load_data( - filename=yaml_path, root_path=yaml_path.parent + filename=filename, root_path=Path(filename).parent ) return model diff --git a/petab_select/models.py b/petab_select/models.py index 6e770d3..a681157 100644 --- a/petab_select/models.py +++ b/petab_select/models.py @@ -1,14 +1,23 @@ from __future__ import annotations +import copy import warnings from collections import Counter from collections.abc import Iterable, MutableSequence from pathlib import Path from typing import TYPE_CHECKING, Any, TypeAlias +import mkstd import numpy as np import pandas as pd -import yaml +from pydantic import ( + Field, + PrivateAttr, + RootModel, + ValidationInfo, + ValidatorFunctionWrapHandler, + model_validator, +) from .constants import ( CRITERIA, @@ -42,14 +51,15 @@ ModelIndex: TypeAlias = int | ModelHash | slice | str | Iterable __all__ = [ - "ListDict", + "_ListDict", "Models", "models_from_yaml_list", "models_to_yaml_list", + "ModelsStandard", ] -class ListDict(MutableSequence): +class _ListDict(RootModel, MutableSequence): """Acts like a ``list`` and a ``dict``. Not all methods are implemented -- feel free to request anything that you @@ -73,18 +83,63 @@ class ListDict(MutableSequence): _hashes: The list of metadata (dictionary keys) (model hashes). _problem: + The PEtab Select problem. """ - def __init__( - self, models: Iterable[ModelLike] = None, problem: Problem = None + root: list[Model] = Field(default_factory=list) + _hashes: list[ModelHash] = PrivateAttr(default_factory=list) + _problem: Problem | None = PrivateAttr(default=None) + + @model_validator(mode="wrap") + def _check_kwargs( + kwargs: dict[str, list[ModelLike] | Problem] | list[ModelLike], + handler: ValidatorFunctionWrapHandler, + info: ValidationInfo, ) -> Models: - self._models = [] - self._hashes = [] - self._problem = problem + """Handle `Models` creation from different sources.""" + _models = [] + _problem = None + if isinstance(kwargs, list): + _models = kwargs + elif isinstance(kwargs, dict): + # Identify the argument with the models + if "models" in kwargs and "root" in kwargs: + raise ValueError("Provide only one of `root` and `models`.") + _models = kwargs.get("models") or kwargs.get("root") or [] + + # Identify the argument with the PEtab Select problem + if "problem" in kwargs and "_problem" in kwargs: + raise ValueError( + "Provide only one of `problem` and `_problem`." + ) + _problem = kwargs.get("problem") or kwargs.get("_problem") + + # Distribute model constructor kwargs to each model dict + if model_kwargs := kwargs.get("model_kwargs"): + for _model_index, _model in enumerate(_models): + if not isinstance(_model, dict): + raise ValueError( + "`model_kwargs` are only intended to be used when " + "constructing models from a YAML file." + ) + _models[_model_index] = {**_model, **model_kwargs} + + models = handler(_models) + models._problem = _problem + return models + + @model_validator(mode="after") + def _check_typing(self: RootModel) -> RootModel: + """Fix model typing.""" + models0 = self._models + self.root = [] + # This also converts all model hashes into models. + self.extend(models0) + return self - if models is None: - models = [] - self.extend(models) + @property + def _models(self) -> list[Model]: + return self.root def __repr__(self) -> str: """Get the model hashes that can regenerate these models. @@ -97,7 +152,7 @@ def __repr__(self) -> str: # skipped __lt__, __le__ def __eq__(self, other) -> bool: - other_hashes = Models(other)._hashes + other_hashes = Models(models=other)._hashes same_length = len(self._hashes) == len(other_hashes) same_hashes = set(self._hashes) == set(other_hashes) return same_length and same_hashes @@ -253,14 +308,16 @@ def __add__( new_models = [self._problem.model_hash_to_model(other)] case Iterable(): # Assumes the models belong to the same PEtab Select problem. - new_models = Models(other, problem=self._problem)._models + new_models = Models( + models=other, _problem=self._problem + )._models case _: raise TypeError(f"Unexpected type: `{type(other)}`.") models = self._models + new_models if not left: models = new_models + self._models - return Models(models=models, problem=self._problem) + return Models(models=models, _problem=self._problem) def __radd__(self, other: ModelLike | ModelsLike) -> Models: return self.__add__(other=other, left=False) @@ -271,7 +328,7 @@ def __iadd__(self, other: ModelLike | ModelsLike) -> Models: # skipped __mul__, __rmul__, __imul__ def __copy__(self) -> Models: - return Models(models=self._models, problem=self._problem) + return Models(models=self._models, _problem=self._problem) def append(self, item: ModelLike) -> None: self._update(index=len(self), item=item) @@ -307,7 +364,11 @@ def extend(self, other: Iterable[ModelLike]) -> None: for model_like in other: self.append(model_like) - # __iter__/__next__? Not in UserList... + def __iter__(self): + return iter(self._models) + + def __next__(self): + raise NotImplementedError # `dict` methods. @@ -331,7 +392,7 @@ def values(self) -> Models: return self -class Models(ListDict): +class Models(_ListDict): """A collection of models. Provide a PEtab Select ``problem`` to the constructor or via @@ -364,70 +425,56 @@ def lint(self): @staticmethod def from_yaml( - models_yaml: TYPE_PATH, + filename: TYPE_PATH, petab_problem: petab.Problem = None, problem: Problem = None, ) -> Models: - """Generate models from a PEtab Select list of model YAML file. + """Load models from a YAML file. Args: - models_yaml: - The path to the PEtab Select list of model YAML file. + filename: + Location of the YAML file. petab_problem: - Provide a preloaded copy of the PEtab problem. Note: + Provide a preloaded copy of the PEtab problem. N.B.: all models should share the same PEtab problem if this is provided. problem: - The PEtab Select problem. + The PEtab Select problem. N.B.: all models should belong to the + same PEtab Select problem if this is provided. Returns: The models. """ - with open(str(models_yaml)) as f: - model_dict_list = yaml.safe_load(f) - if not model_dict_list: - # Empty file - models = [] - elif isinstance(model_dict_list, dict): - # File contains a single model - model_dict_list = [model_dict_list] - - models = [ - Model.model_validate( - { - **model_dict, - ROOT_PATH: Path(models_yaml).parent, - MODEL_SUBSPACE_PETAB_PROBLEM: petab_problem, - } - ) - for model_dict in model_dict_list - ] - - return Models(models=models, problem=problem) + return ModelsStandard.load_data( + filename=filename, + _problem=problem, + model_kwargs={ + ROOT_PATH: Path(filename).parent, + MODEL_SUBSPACE_PETAB_PROBLEM: petab_problem, + }, + ) def to_yaml( self, - output_yaml: TYPE_PATH, + filename: TYPE_PATH, relative_paths: bool = True, ) -> None: - """Generate a YAML listing of models. + """Save models to a YAML file. Args: - output_yaml: - The location where the YAML will be saved. + filename: + Location of the YAML file. relative_paths: Whether to rewrite the paths in each model (e.g. the path to the model's PEtab problem) relative to the `output_yaml` location. """ - paths_relative_to = None + models = self._models if relative_paths: - paths_relative_to = Path(output_yaml).parent - model_dicts = [ - model.to_dict(paths_relative_to=paths_relative_to) - for model in self - ] - with open(output_yaml, "w") as f: - yaml.safe_dump(model_dicts, f) + root_path = Path(filename).parent + models = copy.deepcopy(models) + for model in models: + model.set_relative_paths(root_path=root_path) + ModelsStandard.save_data(data=models, filename=filename) def get_criterion( self, @@ -573,3 +620,6 @@ def models_to_yaml_list( Models(models=models).to_yaml( output_yaml=output_yaml, relative_paths=relative_paths ) + + +ModelsStandard = mkstd.YamlStandard(model=Models) diff --git a/pyproject.toml b/pyproject.toml index 12d1afa..7043546 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ "pyyaml>=6.0.2", "click>=8.1.7", "dill>=0.3.9", - "mkstd>=0.0.5", + "mkstd>=0.0.7", ] [project.optional-dependencies] plot = [