Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Hugging Face Hub integration #728

Closed
wants to merge 2 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 118 additions & 1 deletion GANDLF/utils/modelio.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import hashlib
import os
import subprocess
from typing import Any, Dict
from typing import Any, Dict, Optional

import torch

Expand Down Expand Up @@ -136,6 +136,7 @@
params: Dict[str, Any],
path: str,
onnx_export: bool = True,
hf_hub_repo_id: Optional[str] = None,
):
"""
Save the model dictionary to a file.
Expand All @@ -146,6 +147,7 @@
params (dict): The parameter dictionary.
path (str): The path to save the model dictionary to.
onnx_export (bool): Whether to export to ONNX and OpenVINO.
hf_hub_repo_id (str): The Hugging Face Hub repo id to push to. Defaults to None (will not push to HF Hub).
"""
model_dict["timestamp"] = get_unique_timestamp()
model_dict["timestamp_hash"] = hashlib.sha256(
Expand All @@ -169,6 +171,9 @@
# post-training optimization
optimize_and_save_model(model, params, path, onnx_export=onnx_export)

if hf_hub_repo_id is not None:
push_model_to_hf_hub(model_path=path, repo_path=path, repo_id=hf_hub_repo_id)

Check warning on line 175 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L175

Added line #L175 was not covered by tests
#TODO: also push optimized models?

def load_model(
path: str, device: torch.device, full_sanity_check: bool = True
Expand Down Expand Up @@ -242,3 +247,115 @@
output_layer = compiled_model.outputs

return compiled_model, input_layer, output_layer


def load_model_from_hf_hub(
repo_id: str,
model_filename: str,
revision: str = None,
local_dir: str = None,
device: str = "CPU",
) -> Dict[str, Any]:
"""
Download and load model from Hugging Face Hub. If the repo is private, credentials must be set beforehand.

Args:
repo_id (str): The Hugging Face Hub repo id.
model_file_path (str): The model filename in the repo.
revision (Optional[str]): The revision of the model to load. Defaults to
the latest revision.
local_dir (Optional[str]): The local directory to download the model to. Defaults to None.
device (str): The device to run inference, can be "CPU", "GPU" or "MULTI:CPU,GPU". Default is "CPU".

Returns:
path: Path to model file. Can be used in `load_model`.
"""
try:
import huggingface_hub
except ImportError:
print("WARNING: huggingface_hub is not present.")

Check warning on line 276 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L273-L276

Added lines #L273 - L276 were not covered by tests

local_model_path = huggingface_hub.hf_hub_download(

Check warning on line 278 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L278

Added line #L278 was not covered by tests
repo_id=repo_id,
filename=model_filename,
revision=revision,
local_dir=local_dir
)

return load_model(local_model_path, device)

Check warning on line 285 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L285

Added line #L285 was not covered by tests


def upload_model_repo_to_hf_hub(
repo_id: str,
local_path: str,
path_in_repo: str,
model_card_path: Optional[str] = None,
private: Optional[bool] = False,
upload_onnx: Optional[bool] = False,
upload_ov: Optional[bool] = False
) -> str:
"""
Upload model to repo on Hugging Face Hub. Must be logged in to Hugging Face Hub.

Args:
repo_id (str): The Hugging Face Hub repo id to upload to. Will create if does not already exist.
local_path (str): The path to the model to upload.
path_in_repo (str): The path to the model in the repo.
model_card_path (Optional[str]): The path to the model card to upload. Defaults to None.
private (Optional[bool]): Whether to make the repo private. Defaults to False.
upload_onnx (Optional[bool]): Whether to upload the ONNX model. Defaults to False.
upload_ov (Optional[bool]): Whether to upload the OpenVINO model. Defaults to False.

Returns:
str: The revision of the model.
"""
try:
import huggingface_hub
except ImportError:
print("WARNING: huggingface_hub is not present.")

Check warning on line 315 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L312-L315

Added lines #L312 - L315 were not covered by tests

huggingface_hub.create_repo(repo_id=repo_id, private=private)

Check warning on line 317 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L317

Added line #L317 was not covered by tests

api = huggingface_hub.HfApi()

Check warning on line 319 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L319

Added line #L319 was not covered by tests

api.upload_file(path_or_file_obj=local_path, path_in_repo=path_in_repo, repo_id=repo_id)

Check warning on line 321 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L321

Added line #L321 was not covered by tests

if model_card_path is not None:

Check warning on line 323 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L323

Added line #L323 was not covered by tests
#TODO: upload model card
pass

Check warning on line 325 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L325

Added line #L325 was not covered by tests
else:
#TODO: create new model card
pass

Check warning on line 328 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L328

Added line #L328 was not covered by tests

#TODO: upload optimized models?


def push_model_to_hf_hub(
repo_id: str,
local_path: str,
path_in_repo: str
) -> str:
"""
Push model to repo on Hugging Face Hub. Must be logged in to Hugging Face Hub.

Args:
repo_id (str): The Hugging Face Hub repo id to push to.
model_path (str): The local path to the model to push.
path_in_repo (str): The path to the model in the repo.

Returns:
str: The URL to visualize the uploaded file on the hub.
"""

try:
import huggingface_hub
except ImportError:
print("WARNING: huggingface_hub is not present.")

Check warning on line 353 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L350-L353

Added lines #L350 - L353 were not covered by tests

api = huggingface_hub.HfApi()

Check warning on line 355 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L355

Added line #L355 was not covered by tests

return api.upload_file(

Check warning on line 357 in GANDLF/utils/modelio.py

View check run for this annotation

Codecov / codecov/patch

GANDLF/utils/modelio.py#L357

Added line #L357 was not covered by tests
path_or_file_obj=local_path,
path_in_repo=path_in_repo,
repo_id=repo_id
)
Loading