diff --git a/dvc/repo/gc.py b/dvc/repo/gc.py index e323f7912a..a499a1a61d 100644 --- a/dvc/repo/gc.py +++ b/dvc/repo/gc.py @@ -128,9 +128,11 @@ def gc( # noqa: C901, PLR0912, PLR0913 used_obj_ids = set() used_obj_ids.update(*odb_to_obj_ids.values()) + seen_odbs = set() for scheme, odb in self.cache.by_scheme(): - if not odb: + if not odb or odb in seen_odbs: continue + seen_odbs.add(odb) num_removed = ogc(odb, used_obj_ids, jobs=jobs, dry=dry) if num_removed: logger.info("Removed %d objects from %s cache.", num_removed, scheme) diff --git a/pyproject.toml b/pyproject.toml index 931dbecdee..6b4ee30d87 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ dependencies = [ "distro>=1.3", "dpath<3,>=2.1.0", "dulwich", - "dvc-data>=3.17.0,<3.18", + "dvc-data>=3.18.0,<3.19.0", "dvc-http>=2.29.0", "dvc-objects", "dvc-render>=1.0.1,<2", diff --git a/tests/func/test_gc.py b/tests/func/test_gc.py index b4f6af2ccd..4aa8eb7707 100644 --- a/tests/func/test_gc.py +++ b/tests/func/test_gc.py @@ -437,8 +437,16 @@ def test_gc_logging(caplog, dvc, good_and_bad_cache): with caplog.at_level(logging.INFO, logger="dvc"): dvc.gc(workspace=True) - assert "Removed 3 objects from repo cache." in caplog.text - assert "No unused 'local' cache to remove." in caplog.text + # Check that 3 objects were removed from either repo or local cache + # (they point to the same ODB, so only one will be logged due to deduplication) + has_repo_log = "Removed 3 objects from repo cache." in caplog.text + has_local_log = "Removed 3 objects from local cache." in caplog.text + + # Exactly one of repo or local should have the log, not both (deduplication) + assert has_repo_log ^ has_local_log, ( + "Should have exactly one log for repo/local cache" + ) + assert "No unused 'legacy' cache to remove." in caplog.text @@ -450,3 +458,21 @@ def test_gc_skip_failed(tmp_dir, dvc): dvc.gc(force=True, workspace=True) dvc.gc(force=True, workspace=True, skip_failed=True) + + +def test_gc_dry_logs_paths(caplog, tmp_dir, dvc): + """Test that dry run logs paths of objects to be removed.""" + odb = dvc.cache.local + + # Add some unused objects directly to cache + unused_hashes = ["test123", "test456", "test789"] + for hash_val in unused_hashes: + odb.add_bytes(hash_val, hash_val.encode("utf8")) + + with caplog.at_level(logging.INFO, logger="dvc_data.hashfile.gc"): + dvc.gc(workspace=True, dry=True) + + # Verify that paths are logged by dvc-data layer + for hash_val in unused_hashes: + expected_path = odb.oid_to_path(hash_val) + assert f"Removing {expected_path}" in caplog.text