From eff299923d3674b530d50a02b7012455285e375b Mon Sep 17 00:00:00 2001 From: Ruslan Kuprieiev Date: Wed, 15 Mar 2023 04:44:11 +0200 Subject: [PATCH] index: data: support imports Stepping stone to simplifying `dvc fetch/pull` by using index. Fetch handles regular imports through index already, but not repo imports because their processing is much more involved (e.g. chained imports) in the current arch. With `FileStorage` support introduced into `DataIndex` and `datafs` supporting imports overall, `dvcfs` can now handle repo imports (even chained ones). This will soon allow us to handle repo imports the same way we handle regular ones, improve performance and get rid of a lot of messy code (e.g. DependencyRepo). Related https://github.com/iterative/scmrepo/issues/207 Related https://github.com/iterative/dvc-data/pull/315 Related https://github.com/iterative/studio/issues/5261 --- dvc/dependency/repo.py | 7 +++++++ dvc/repo/index.py | 4 +--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/dvc/dependency/repo.py b/dvc/dependency/repo.py index 481d67874c0..902a9811a8d 100644 --- a/dvc/dependency/repo.py +++ b/dvc/dependency/repo.py @@ -36,11 +36,18 @@ class RepoDependency(Dependency): } def __init__(self, def_repo: Dict[str, str], stage: "Stage", *args, **kwargs): + from dvc.fs import DVCFileSystem + self.def_repo = def_repo self._objs: Dict[str, "HashFile"] = {} self._meta: Dict[str, "Meta"] = {} super().__init__(stage, *args, **kwargs) + self.fs = DVCFileSystem( + self.def_repo[self.PARAM_URL], + rev=self.def_repo.get(self.PARAM_REV_LOCK), + ) + def _parse_path(self, fs, fs_path): # noqa: ARG002 return None diff --git a/dvc/repo/index.py b/dvc/repo/index.py index 68b8fd12dce..b57166822d2 100644 --- a/dvc/repo/index.py +++ b/dvc/repo/index.py @@ -142,8 +142,6 @@ def _load_storage_from_out(storage_map, key, out): from dvc.config import NoRemoteError from dvc_data.index import FileStorage, ObjectStorage - if out.odb: - storage_map.add_data(ObjectStorage(key, out.odb)) storage_map.add_cache(ObjectStorage(key, out.cache)) try: remote = out.repo.cloud.get_remote(out.remote) @@ -161,7 +159,7 @@ def _load_storage_from_out(storage_map, key, out): except NoRemoteError: pass - if out.stage.is_import and not out.stage.is_repo_import: + if out.stage.is_import: dep = out.stage.deps[0] storage_map.add_data(FileStorage(key, dep.fs, dep.fs_path))