Skip to content

Commit

Permalink
Fix huggingface filesystem repo_type not forwarded (#1791)
Browse files Browse the repository at this point in the history
  • Loading branch information
Wauplin authored Oct 31, 2023
1 parent 7b50c1b commit e0e2c16
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 11 deletions.
1 change: 1 addition & 0 deletions src/huggingface_hub/hf_file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ def _fetch_range(self, start: int, end: int) -> bytes:
repo_id=self.resolved_path.repo_id,
revision=self.resolved_path.revision,
filename=self.resolved_path.path_in_repo,
repo_type=self.resolved_path.repo_type,
endpoint=self.fs.endpoint,
)
r = http_backoff("GET", url, headers=headers)
Expand Down
22 changes: 11 additions & 11 deletions tests/test_hf_file_system.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,10 @@
import fsspec
import pytest

from huggingface_hub.constants import REPO_TYPES_URL_PREFIXES
from huggingface_hub.hf_file_system import HfFileSystem
from huggingface_hub.utils import RepositoryNotFoundError, RevisionNotFoundError

from .testing_constants import ENDPOINT_STAGING, TOKEN, USER
from .testing_constants import ENDPOINT_STAGING, TOKEN
from .testing_utils import repo_name


Expand All @@ -22,36 +21,37 @@ def setUpClass(cls):
fsspec.register_implementation(HfFileSystem.protocol, HfFileSystem)

def setUp(self):
self.repo_id = f"{USER}/{repo_name()}"
self.repo_type = "dataset"
self.hf_path = REPO_TYPES_URL_PREFIXES.get(self.repo_type, "") + self.repo_id
self.hffs = HfFileSystem(endpoint=ENDPOINT_STAGING, token=TOKEN)
self.api = self.hffs._api

# Create dummy repo
self.api.create_repo(self.repo_id, repo_type=self.repo_type)
repo_url = self.api.create_repo(repo_name(), repo_type="dataset")
self.repo_id = repo_url.repo_id
self.hf_path = f"datasets/{self.repo_id}"

# Upload files
self.api.upload_file(
path_or_fileobj=b"dummy binary data on pr",
path_in_repo="data/binary_data_for_pr.bin",
repo_id=self.repo_id,
repo_type=self.repo_type,
repo_type="dataset",
create_pr=True,
)
self.api.upload_file(
path_or_fileobj="dummy text data".encode("utf-8"),
path_in_repo="data/text_data.txt",
repo_id=self.repo_id,
repo_type=self.repo_type,
repo_type="dataset",
)
self.api.upload_file(
path_or_fileobj=b"dummy binary data",
path_in_repo="data/binary_data.bin",
repo_id=self.repo_id,
repo_type=self.repo_type,
repo_type="dataset",
)

def tearDown(self):
self.api.delete_repo(self.repo_id, repo_type=self.repo_type)
self.api.delete_repo(self.repo_id, repo_type="dataset")

def test_glob(self):
self.assertEqual(
Expand Down Expand Up @@ -141,7 +141,7 @@ def test_modified_time(self):

def test_initialize_from_fsspec(self):
fs, _, paths = fsspec.get_fs_token_paths(
f"hf://{self.repo_type}s/{self.repo_id}/data/text_data.txt",
f"hf://datasets/{self.repo_id}/data/text_data.txt",
storage_options={
"endpoint": ENDPOINT_STAGING,
"token": TOKEN,
Expand Down

0 comments on commit e0e2c16

Please sign in to comment.