Skip to content

Commit 7319ea4

Browse files
mariosaskoWauplin
andauthored
Deprecate HfApi.list_files_info (#1910)
* Deprecate `HfApi.list_files_info` * Style * Fix test * Mypy fix * Add missing import * Last fix :) * Apply suggestions from code review Co-authored-by: Lucain <lucainp@gmail.com> * Style --------- Co-authored-by: Lucain <lucainp@gmail.com>
1 parent 8d917aa commit 7319ea4

File tree

3 files changed

+27
-7
lines changed

3 files changed

+27
-7
lines changed

src/huggingface_hub/_commit_api.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939

4040
UploadMode = Literal["lfs", "regular"]
4141

42-
# Max is 1,000 per request on the Hub for HfApi.list_files_info
42+
# Max is 1,000 per request on the Hub for HfApi.get_paths_info
4343
# Otherwise we get:
4444
# HfHubHTTPError: 413 Client Error: Payload Too Large for url: https://huggingface.co/api/datasets/xxx (Request ID: xxx)\n\ntoo many parameters
4545
# See https://github.com/huggingface/huggingface_hub/issues/1503
@@ -555,21 +555,23 @@ def _fetch_lfs_files_to_copy(
555555
[`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError)
556556
If the Hub API response is improperly formatted.
557557
"""
558-
from .hf_api import HfApi
558+
from .hf_api import HfApi, RepoFolder
559559

560560
hf_api = HfApi(endpoint=endpoint, token=token)
561561
files_to_copy = {}
562562
for src_revision, operations in groupby(copies, key=lambda op: op.src_revision):
563563
operations = list(operations) # type: ignore
564564
paths = [op.src_path_in_repo for op in operations]
565565
for offset in range(0, len(paths), FETCH_LFS_BATCH_SIZE):
566-
src_repo_files = hf_api.list_files_info(
566+
src_repo_files = hf_api.get_paths_info(
567567
repo_id=repo_id,
568568
paths=paths[offset : offset + FETCH_LFS_BATCH_SIZE],
569569
revision=src_revision or revision,
570570
repo_type=repo_type,
571571
)
572572
for src_repo_file in src_repo_files:
573+
if isinstance(src_repo_file, RepoFolder):
574+
raise NotImplementedError("Copying a folder is not implemented.")
573575
if not src_repo_file.lfs:
574576
raise NotImplementedError("Copying a non-LFS file is not implemented")
575577
files_to_copy[(src_repo_file.rfilename, src_revision)] = src_repo_file

src/huggingface_hub/hf_api.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
validate_hf_hub_args,
131131
)
132132
from .utils import tqdm as hf_tqdm
133+
from .utils._deprecation import _deprecate_method
133134
from .utils._typing import CallableT
134135
from .utils.endpoint_helpers import (
135136
DatasetFilter,
@@ -2376,6 +2377,7 @@ def file_exists(
23762377
return False
23772378

23782379
@validate_hf_hub_args
2380+
@_deprecate_method(version="0.23", message="Use `list_repo_tree` and `get_paths_info` instead.")
23792381
def list_files_info(
23802382
self,
23812383
repo_id: str,
@@ -2588,9 +2590,10 @@ def list_repo_files(
25882590
"""
25892591
return [
25902592
f.rfilename
2591-
for f in self.list_files_info(
2592-
repo_id=repo_id, paths=None, revision=revision, repo_type=repo_type, token=token
2593+
for f in self.list_repo_tree(
2594+
repo_id=repo_id, recursive=True, revision=revision, repo_type=repo_type, token=token
25932595
)
2596+
if isinstance(f, RepoFile)
25942597
]
25952598

25962599
@validate_hf_hub_args

tests/test_hf_api.py

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@
102102
DUMMY_MODEL_ID,
103103
DUMMY_MODEL_ID_REVISION_ONE_SPECIFIC_COMMIT,
104104
SAMPLE_DATASET_IDENTIFIER,
105+
expect_deprecation,
105106
repo_name,
106107
require_git_lfs,
107108
rmtree_with_retry,
@@ -1003,7 +1004,7 @@ def test_commit_copy_file(self, repo_url: RepoUrl) -> None:
10031004
self.assertIn("lfs Copy (1).bin", repo_files)
10041005

10051006
# Check same LFS file
1006-
repo_file1, repo_file2 = self._api.list_files_info(repo_id=repo_id, paths=["lfs.bin", "lfs Copy.bin"])
1007+
repo_file1, repo_file2 = self._api.get_paths_info(repo_id=repo_id, paths=["lfs.bin", "lfs Copy.bin"])
10071008
self.assertEqual(repo_file1.lfs["sha256"], repo_file2.lfs["sha256"])
10081009

10091010
@use_tmp_repo()
@@ -1161,6 +1162,7 @@ def setUpClass(cls):
11611162
def tearDownClass(cls):
11621163
cls._api.delete_repo(repo_id=cls.repo_id)
11631164

1165+
@expect_deprecation("list_files_info")
11641166
def test_get_regular_file_info(self):
11651167
files = list(self._api.list_files_info(repo_id=self.repo_id, paths="file.md"))
11661168
self.assertEqual(len(files), 1)
@@ -1171,6 +1173,7 @@ def test_get_regular_file_info(self):
11711173
self.assertEqual(file.size, 4)
11721174
self.assertEqual(file.blob_id, "6320cd248dd8aeaab759d5871f8781b5c0505172")
11731175

1176+
@expect_deprecation("list_files_info")
11741177
def test_get_lfs_file_info(self):
11751178
files = list(self._api.list_files_info(repo_id=self.repo_id, paths="lfs.bin"))
11761179
self.assertEqual(len(files), 1)
@@ -1188,34 +1191,41 @@ def test_get_lfs_file_info(self):
11881191
self.assertEqual(file.size, 4)
11891192
self.assertEqual(file.blob_id, "0a828055346279420bd02a4221c177bbcdc045d8")
11901193

1194+
@expect_deprecation("list_files_info")
11911195
def test_list_files(self):
11921196
files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["file.md", "lfs.bin", "2/file_2.md"]))
11931197
self.assertEqual(len(files), 3)
11941198
self.assertEqual({f.path for f in files}, {"file.md", "lfs.bin", "2/file_2.md"})
11951199

1200+
@expect_deprecation("list_files_info")
11961201
def test_list_files_and_folder(self):
11971202
files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["file.md", "lfs.bin", "2"]))
11981203
self.assertEqual(len(files), 3)
11991204
self.assertEqual({f.path for f in files}, {"file.md", "lfs.bin", "2/file_2.md"})
12001205

1206+
@expect_deprecation("list_files_info")
12011207
def test_list_unknown_path_among_other(self):
12021208
files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["file.md", "unknown"]))
12031209
self.assertEqual(len(files), 1)
12041210

1211+
@expect_deprecation("list_files_info")
12051212
def test_list_unknown_path_alone(self):
12061213
files = list(self._api.list_files_info(repo_id=self.repo_id, paths="unknown"))
12071214
self.assertEqual(len(files), 0)
12081215

1216+
@expect_deprecation("list_files_info")
12091217
def test_list_folder_flat(self):
12101218
files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["2"]))
12111219
self.assertEqual(len(files), 1)
12121220
self.assertEqual(files[0].path, "2/file_2.md")
12131221

1222+
@expect_deprecation("list_files_info")
12141223
def test_list_folder_recursively(self):
12151224
files = list(self._api.list_files_info(repo_id=self.repo_id, paths=["1"]))
12161225
self.assertEqual(len(files), 2)
12171226
self.assertEqual({f.path for f in files}, {"1/2/file_1_2.md", "1/file_1.md"})
12181227

1228+
@expect_deprecation("list_files_info")
12191229
def test_list_repo_files_manually(self):
12201230
files = list(self._api.list_files_info(repo_id=self.repo_id))
12211231
self.assertEqual(len(files), 7)
@@ -1224,22 +1234,26 @@ def test_list_repo_files_manually(self):
12241234
{".gitattributes", "1/2/file_1_2.md", "1/file_1.md", "2/file_2.md", "3/file_3.md", "file.md", "lfs.bin"},
12251235
)
12261236

1237+
@expect_deprecation("list_files_info")
12271238
def test_list_repo_files_alias(self):
12281239
self.assertEqual(
1229-
set(self._api.list_repo_files(repo_id=self.repo_id)),
1240+
set(f.path for f in self._api.list_files_info(repo_id=self.repo_id)),
12301241
{".gitattributes", "1/2/file_1_2.md", "1/file_1.md", "2/file_2.md", "3/file_3.md", "file.md", "lfs.bin"},
12311242
)
12321243

1244+
@expect_deprecation("list_files_info")
12331245
def test_list_with_root_path_is_ignored(self):
12341246
# must use `paths=None`
12351247
files = list(self._api.list_files_info(repo_id=self.repo_id, paths="/"))
12361248
self.assertEqual(len(files), 0)
12371249

1250+
@expect_deprecation("list_files_info")
12381251
def test_list_with_empty_path_is_invalid(self):
12391252
# must use `paths=None`
12401253
with self.assertRaises(BadRequestError):
12411254
list(self._api.list_files_info(repo_id=self.repo_id, paths=""))
12421255

1256+
@expect_deprecation("list_files_info")
12431257
@with_production_testing
12441258
def test_list_files_with_expand(self):
12451259
files = list(
@@ -1259,6 +1273,7 @@ def test_list_files_with_expand(self):
12591273
self.assertTrue(vae_model.security["safe"])
12601274
self.assertTrue(isinstance(vae_model.security["av_scan"], dict)) # all details in here
12611275

1276+
@expect_deprecation("list_files_info")
12621277
@with_production_testing
12631278
def test_list_files_without_expand(self):
12641279
files = list(

0 commit comments

Comments
 (0)