From ffb204755eefb2c883357141a3cd413d7d62ab10 Mon Sep 17 00:00:00 2001 From: Alexander Guschin <1aguschin@gmail.com> Date: Mon, 6 Mar 2023 20:36:15 +0600 Subject: [PATCH] Read MLEM metafiles with `description` and `labels` (#630) We deprecated them a while but were able to read them still, but not so long ago a change in MLEM made these files unreadable. Some repos still have those, such as https://github.com/iterative/demo-bank-customer-churn, which is needed to be read in Studio now. Besides, I'm switching `try_migrations=True` to automatically try to read and fix old metafile formats. @mike0sv, you initially set it to `False` - did you have any specific concerns in mind for that? cc @amritghimire @jellebouwman - once this is merged and released we need to check if the MLEM models in the repo can be read without any troubles. I'll ask your help with that @amritghimire --- mlem/api/migrations.py | 17 ++++++++++++++++- mlem/core/metadata.py | 8 ++++---- mlem/core/objects.py | 10 ++++++++-- tests/api/test_migrations.py | 30 +++++++++++++++++++++++------- 4 files changed, 51 insertions(+), 14 deletions(-) diff --git a/mlem/api/migrations.py b/mlem/api/migrations.py index afd6020f..1553c835 100644 --- a/mlem/api/migrations.py +++ b/mlem/api/migrations.py @@ -52,6 +52,18 @@ def _migrate_one(location: Location): safe_dump(payload, f) +def _migrate_to_028(meta: dict) -> Optional[dict]: + if "object_type" not in meta: + return None + + if "description" in meta: + meta.pop("description") + + if "labels" in meta: + meta.pop("labels") + return meta + + def _migrate_to_040(meta: dict) -> Optional[dict]: if "object_type" not in meta or meta["object_type"] != "model": return None @@ -67,4 +79,7 @@ def _migrate_to_040(meta: dict) -> Optional[dict]: return meta -_migrations: List[Callable[[dict], Optional[dict]]] = [_migrate_to_040] +_migrations: List[Callable[[dict], Optional[dict]]] = [ + _migrate_to_028, + _migrate_to_040, +] diff --git a/mlem/core/metadata.py b/mlem/core/metadata.py index 465cd500..c3490bef 100644 --- a/mlem/core/metadata.py +++ b/mlem/core/metadata.py @@ -138,7 +138,7 @@ def load( rev: Optional[str] = None, batch_size: Optional[int] = None, follow_links: bool = True, - try_migrations: bool = False, + try_migrations: bool = True, ) -> Any: """Load python object saved by MLEM @@ -177,7 +177,7 @@ def load_meta( follow_links: bool = True, load_value: bool = False, fs: Optional[AbstractFileSystem] = None, - try_migrations: bool = False, + try_migrations: bool = True, *, force_type: Literal[None] = None, ) -> MlemObject: @@ -192,7 +192,7 @@ def load_meta( follow_links: bool = True, load_value: bool = False, fs: Optional[AbstractFileSystem] = None, - try_migrations: bool = False, + try_migrations: bool = True, *, force_type: Optional[Type[T]] = None, ) -> T: @@ -207,7 +207,7 @@ def load_meta( follow_links: bool = True, load_value: bool = False, fs: Optional[AbstractFileSystem] = None, - try_migrations: bool = False, + try_migrations: bool = True, *, force_type: Optional[Type[T]] = None, ) -> T: diff --git a/mlem/core/objects.py b/mlem/core/objects.py index 140a4f5d..beeef7c7 100644 --- a/mlem/core/objects.py +++ b/mlem/core/objects.py @@ -171,11 +171,17 @@ def read( ) with location.open() as f: payload = safe_load(f) - if try_migrations: + try: + res = parse_obj_as(MlemObject, payload).bind(location) + except ValidationError as e: + if not try_migrations: + raise e + from mlem.api.migrations import apply_migrations payload, _ = apply_migrations(payload) - res = parse_obj_as(MlemObject, payload).bind(location) + res = parse_obj_as(MlemObject, payload).bind(location) + if follow_links and isinstance(res, MlemLink): link = res.load_link() if not isinstance(link, cls): diff --git a/tests/api/test_migrations.py b/tests/api/test_migrations.py index 54c7c0b1..e6ee94ed 100644 --- a/tests/api/test_migrations.py +++ b/tests/api/test_migrations.py @@ -6,6 +6,22 @@ from mlem.core.metadata import load_meta from mlem.core.objects import MlemModel, MlemObject +model_02 = ( + { + "object_type": "model", + "description": "machine learning should be mlemming", + "labels": ["mlemming", "it", "should", "be"], + "artifacts": {}, + "model_type": {"type": "sklearn", "methods": {"lol": {}}}, + }, + MlemModel( + artifacts={}, + call_orders={"lol": [("model", "lol")]}, + processors={"model": {"type": "sklearn", "methods": {"lol": {}}}}, + ), +) + + model_03 = ( { "object_type": "model", @@ -20,7 +36,7 @@ ) -@pytest.mark.parametrize("old_data", [model_03]) +@pytest.mark.parametrize("old_data", [model_02, model_03]) def test_single(tmpdir, old_data): path = tmpdir / "model.mlem" old_payload, new_object = old_data @@ -28,13 +44,13 @@ def test_single(tmpdir, old_data): migrate(str(path)) - meta = load_meta(path) + meta = load_meta(path, try_migrations=False) assert isinstance(meta, MlemObject) assert meta == new_object -@pytest.mark.parametrize("old_data,new_data", [model_03]) +@pytest.mark.parametrize("old_data,new_data", [model_02, model_03]) @pytest.mark.parametrize("recursive", [True, False]) def test_directory(tmpdir, old_data, new_data, recursive): subdir_path = tmpdir / "subdir" / "model.mlem" @@ -48,22 +64,22 @@ def test_directory(tmpdir, old_data, new_data, recursive): for i in range(3): path = tmpdir / f"model{i}.mlem" - meta = load_meta(path) + meta = load_meta(path, try_migrations=False) assert isinstance(meta, MlemObject) assert meta == new_data if recursive: - meta = load_meta(subdir_path) + meta = load_meta(subdir_path, try_migrations=False) assert isinstance(meta, MlemObject) assert meta == new_data else: try: - assert load_meta(subdir_path) != new_data + assert load_meta(subdir_path, try_migrations=False) != new_data except ValidationError: pass -@pytest.mark.parametrize("old_data,new_data", [model_03]) +@pytest.mark.parametrize("old_data,new_data", [model_02, model_03]) def test_load_with_migration(tmpdir, old_data, new_data): path = tmpdir / "model.mlem" path.write_text(safe_dump(old_data), encoding="utf8")