Skip to content
This repository has been archived by the owner on Sep 13, 2023. It is now read-only.

Commit

Permalink
Read MLEM metafiles with description and labels (#630)
Browse files Browse the repository at this point in the history
We deprecated them a while but were able to read them still, but not so
long ago a change in MLEM made these files unreadable. Some repos still
have those, such as
https://github.com/iterative/demo-bank-customer-churn, which is needed
to be read in Studio now.

Besides, I'm switching `try_migrations=True` to automatically try to
read and fix old metafile formats. @mike0sv, you initially set it to
`False` - did you have any specific concerns in mind for that?

cc @amritghimire @jellebouwman - once this is merged and released we
need to check if the MLEM models in the repo can be read without any
troubles. I'll ask your help with that @amritghimire
  • Loading branch information
aguschin authored Mar 6, 2023
1 parent 92782f5 commit ffb2047
Show file tree
Hide file tree
Showing 4 changed files with 51 additions and 14 deletions.
17 changes: 16 additions & 1 deletion mlem/api/migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,18 @@ def _migrate_one(location: Location):
safe_dump(payload, f)


def _migrate_to_028(meta: dict) -> Optional[dict]:
if "object_type" not in meta:
return None

if "description" in meta:
meta.pop("description")

if "labels" in meta:
meta.pop("labels")
return meta


def _migrate_to_040(meta: dict) -> Optional[dict]:
if "object_type" not in meta or meta["object_type"] != "model":
return None
Expand All @@ -67,4 +79,7 @@ def _migrate_to_040(meta: dict) -> Optional[dict]:
return meta


_migrations: List[Callable[[dict], Optional[dict]]] = [_migrate_to_040]
_migrations: List[Callable[[dict], Optional[dict]]] = [
_migrate_to_028,
_migrate_to_040,
]
8 changes: 4 additions & 4 deletions mlem/core/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def load(
rev: Optional[str] = None,
batch_size: Optional[int] = None,
follow_links: bool = True,
try_migrations: bool = False,
try_migrations: bool = True,
) -> Any:
"""Load python object saved by MLEM
Expand Down Expand Up @@ -177,7 +177,7 @@ def load_meta(
follow_links: bool = True,
load_value: bool = False,
fs: Optional[AbstractFileSystem] = None,
try_migrations: bool = False,
try_migrations: bool = True,
*,
force_type: Literal[None] = None,
) -> MlemObject:
Expand All @@ -192,7 +192,7 @@ def load_meta(
follow_links: bool = True,
load_value: bool = False,
fs: Optional[AbstractFileSystem] = None,
try_migrations: bool = False,
try_migrations: bool = True,
*,
force_type: Optional[Type[T]] = None,
) -> T:
Expand All @@ -207,7 +207,7 @@ def load_meta(
follow_links: bool = True,
load_value: bool = False,
fs: Optional[AbstractFileSystem] = None,
try_migrations: bool = False,
try_migrations: bool = True,
*,
force_type: Optional[Type[T]] = None,
) -> T:
Expand Down
10 changes: 8 additions & 2 deletions mlem/core/objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,17 @@ def read(
)
with location.open() as f:
payload = safe_load(f)
if try_migrations:
try:
res = parse_obj_as(MlemObject, payload).bind(location)
except ValidationError as e:
if not try_migrations:
raise e

from mlem.api.migrations import apply_migrations

payload, _ = apply_migrations(payload)
res = parse_obj_as(MlemObject, payload).bind(location)
res = parse_obj_as(MlemObject, payload).bind(location)

if follow_links and isinstance(res, MlemLink):
link = res.load_link()
if not isinstance(link, cls):
Expand Down
30 changes: 23 additions & 7 deletions tests/api/test_migrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,22 @@
from mlem.core.metadata import load_meta
from mlem.core.objects import MlemModel, MlemObject

model_02 = (
{
"object_type": "model",
"description": "machine learning should be mlemming",
"labels": ["mlemming", "it", "should", "be"],
"artifacts": {},
"model_type": {"type": "sklearn", "methods": {"lol": {}}},
},
MlemModel(
artifacts={},
call_orders={"lol": [("model", "lol")]},
processors={"model": {"type": "sklearn", "methods": {"lol": {}}}},
),
)


model_03 = (
{
"object_type": "model",
Expand All @@ -20,21 +36,21 @@
)


@pytest.mark.parametrize("old_data", [model_03])
@pytest.mark.parametrize("old_data", [model_02, model_03])
def test_single(tmpdir, old_data):
path = tmpdir / "model.mlem"
old_payload, new_object = old_data
path.write_text(safe_dump(old_payload), encoding="utf8")

migrate(str(path))

meta = load_meta(path)
meta = load_meta(path, try_migrations=False)

assert isinstance(meta, MlemObject)
assert meta == new_object


@pytest.mark.parametrize("old_data,new_data", [model_03])
@pytest.mark.parametrize("old_data,new_data", [model_02, model_03])
@pytest.mark.parametrize("recursive", [True, False])
def test_directory(tmpdir, old_data, new_data, recursive):
subdir_path = tmpdir / "subdir" / "model.mlem"
Expand All @@ -48,22 +64,22 @@ def test_directory(tmpdir, old_data, new_data, recursive):

for i in range(3):
path = tmpdir / f"model{i}.mlem"
meta = load_meta(path)
meta = load_meta(path, try_migrations=False)
assert isinstance(meta, MlemObject)
assert meta == new_data

if recursive:
meta = load_meta(subdir_path)
meta = load_meta(subdir_path, try_migrations=False)
assert isinstance(meta, MlemObject)
assert meta == new_data
else:
try:
assert load_meta(subdir_path) != new_data
assert load_meta(subdir_path, try_migrations=False) != new_data
except ValidationError:
pass


@pytest.mark.parametrize("old_data,new_data", [model_03])
@pytest.mark.parametrize("old_data,new_data", [model_02, model_03])
def test_load_with_migration(tmpdir, old_data, new_data):
path = tmpdir / "model.mlem"
path.write_text(safe_dump(old_data), encoding="utf8")
Expand Down

0 comments on commit ffb2047

Please sign in to comment.