From 05bf90477f41cde016ee16e52b7dff5f34b1ad91 Mon Sep 17 00:00:00 2001 From: Sami Jawhar Date: Wed, 3 Dec 2025 03:48:51 +0000 Subject: [PATCH 1/3] Add optional bulk_remote_exists --- dvc/repo/data.py | 42 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 8 deletions(-) diff --git a/dvc/repo/data.py b/dvc/repo/data.py index b29b86f51e..1b95634f89 100644 --- a/dvc/repo/data.py +++ b/dvc/repo/data.py @@ -456,15 +456,41 @@ def _onerror(entry, exc): n += 1 cb.set_size(n) - for key, entry in entries.items(): - k = (*key, "") if entry.meta and entry.meta.isdir else key + entries_list = list(entries.values()) + if hasattr(storage_map, "bulk_remote_exists"): try: - if not storage_map.remote_exists(entry, refresh=remote_refresh): - missing_entries.append(os.path.sep.join(k)) - except StorageKeyError: - pass - finally: - cb.relative_update() + results = storage_map.bulk_remote_exists( + entries_list, refresh=remote_refresh + ) + for key, entry in entries.items(): + k = (*key, "") if entry.meta and entry.meta.isdir else key + try: + if not results.get(entry, True): + missing_entries.append(os.path.sep.join(k)) + except StorageKeyError: + pass + finally: + cb.relative_update() + except Exception: + for key, entry in entries.items(): + k = (*key, "") if entry.meta and entry.meta.isdir else key + try: + if not storage_map.remote_exists(entry, refresh=remote_refresh): + missing_entries.append(os.path.sep.join(k)) + except StorageKeyError: + pass + finally: + cb.relative_update() + else: + for key, entry in entries.items(): + k = (*key, "") if entry.meta and entry.meta.isdir else key + try: + if not storage_map.remote_exists(entry, refresh=remote_refresh): + missing_entries.append(os.path.sep.join(k)) + except StorageKeyError: + pass + finally: + cb.relative_update() data_index.onerror = orig_data_index_onerror return missing_entries From 08cadc60451beac2b937d61248219d571658d0cd Mon Sep 17 00:00:00 2001 From: Falko Galperin Date: Sun, 7 Dec 2025 00:55:34 +0100 Subject: [PATCH 2/3] fix: correctly update progress bar for bulk remote exist queries This also removes the general `except` block and uses fail-closed behavior for missing entries. --- dvc/repo/data.py | 44 ++++++++++---------------------------------- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/dvc/repo/data.py b/dvc/repo/data.py index 1b95634f89..32415fd7e6 100644 --- a/dvc/repo/data.py +++ b/dvc/repo/data.py @@ -456,41 +456,17 @@ def _onerror(entry, exc): n += 1 cb.set_size(n) - entries_list = list(entries.values()) - if hasattr(storage_map, "bulk_remote_exists"): + results = storage_map.bulk_remote_exists( + list(entries.values()), refresh=remote_refresh, callback=cb + ) + for key, entry in entries.items(): + k = (*key, "") if entry.meta and entry.meta.isdir else key try: - results = storage_map.bulk_remote_exists( - entries_list, refresh=remote_refresh - ) - for key, entry in entries.items(): - k = (*key, "") if entry.meta and entry.meta.isdir else key - try: - if not results.get(entry, True): - missing_entries.append(os.path.sep.join(k)) - except StorageKeyError: - pass - finally: - cb.relative_update() - except Exception: - for key, entry in entries.items(): - k = (*key, "") if entry.meta and entry.meta.isdir else key - try: - if not storage_map.remote_exists(entry, refresh=remote_refresh): - missing_entries.append(os.path.sep.join(k)) - except StorageKeyError: - pass - finally: - cb.relative_update() - else: - for key, entry in entries.items(): - k = (*key, "") if entry.meta and entry.meta.isdir else key - try: - if not storage_map.remote_exists(entry, refresh=remote_refresh): - missing_entries.append(os.path.sep.join(k)) - except StorageKeyError: - pass - finally: - cb.relative_update() + if not results.get(entry, False): + missing_entries.append(os.path.sep.join(k)) + except StorageKeyError: + pass + data_index.onerror = orig_data_index_onerror return missing_entries From abacdf96b2d1ade72cd20c38893597b793287296 Mon Sep 17 00:00:00 2001 From: skshetry <18718008+skshetry@users.noreply.github.com> Date: Mon, 22 Dec 2025 13:07:45 +0545 Subject: [PATCH 3/3] Update dvc-data version to 3.17.0 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a15f1e33e4..0518dc1e65 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "distro>=1.3", "dpath<3,>=2.1.0", "dulwich", - "dvc-data>=3.16.2,<3.17", + "dvc-data>=3.17.0,<3.18", "dvc-http>=2.29.0", "dvc-objects", "dvc-render>=1.0.1,<2",