Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions hf-model-import-job/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Hugging Face Model Import Job

This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry.

## Installation

1. Create a virtual environment (recommended):

```bash
python3 -m venv venv
source venv/bin/activate # On Windows: venv\Scripts\activate
```

2. Install the required dependencies:

```bash
pip install -r requirements.txt
```

3. Make sure you have TrueFoundry credentials configured (via `tfy login` or environment variables).

## Usage

### Basic Usage

```bash
# Make sure to activate your virtual environment first
source venv/bin/activate # On Windows: venv\Scripts\activate

python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation"
```

## Arguments

- `--model-id` (required): Hugging Face model ID or repository ID
- `--ml-repo` (required): TrueFoundry ML repository name
- `--model-name` (required): Name for the model in TrueFoundry
- `--model-type` (required): Type of the model (e.g., 'text-generation', 'fill-mask')
- `--hf-token` (optional): Hugging Face token for private models

## Examples

### Import a popular language model:

```bash
python main.py --model-id "gpt2" --ml-repo "language-models" --model-name "gpt2-small" --model-type "text-generation"
```

### Import a BERT model:

```bash
python main.py --model-id "bert-base-uncased" --ml-repo "nlp-models" --model-name "bert-base" --model-type "fill-mask"
```
69 changes: 69 additions & 0 deletions hf-model-import-job/deploy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import logging
from truefoundry.deploy import (
Param,
Manual,
Build,
Resources,
Job,
PythonBuild,
NodeSelector,
LocalSource,
)
import argparse

logging.basicConfig(level=logging.INFO)

parser = argparse.ArgumentParser()
parser.add_argument("--workspace_fqn", required=True, type=str)
args = parser.parse_args()

job = Job(
name="hf-model-importer",
image=Build(
# Set build_source=LocalSource(local_build=False), in order to deploy code from your local.
# With local_build=False flag, docker image will be built on cloud instead of local
# Else it will try to use docker installed on your local machine to build the image
build_source=LocalSource(local_build=False),
build_spec=PythonBuild(
python_version="3.11",
build_context_path="./hf-model-import-job",
requirements_path="requirements.txt",
command="python main.py --model-id {{model_id}} --model-type {{model_type}} --ml-repo {{ml_repo}} --model-name {{model_name}}",
),
),
trigger=Manual(),
params=[
Param(
name="model_id", description="Hugging face model ID", param_type="string"
),
Param(
name="model_type",
description="model type from hugging face",
default="text-generation",
param_type="string",
),
Param(
name="ml_repo",
description="ML repo name to import model to",
param_type="ml_repo",
),
Param(
name="model_name",
description="Model name in truefoundry model registry",
param_type="string",
),
],
resources=Resources(
cpu_request=1.0,
cpu_limit=2.0,
memory_request=2000,
memory_limit=4000,
ephemeral_storage_request=10000,
ephemeral_storage_limit=20000,
),
retries=0,
workspace_fqn=args.workspace_fqn,
)


job.deploy(workspace_fqn=args.workspace_fqn, wait=False)
97 changes: 97 additions & 0 deletions hf-model-import-job/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env python3
"""
Hugging Face Model Import Script for TrueFoundry

This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry.
"""

import argparse
import os
import tempfile
import shutil

from huggingface_hub import snapshot_download
from truefoundry.ml import get_client, TransformersFramework

def main():
"""Main function to handle command line arguments and orchestrate the process."""
parser = argparse.ArgumentParser(
description="Download a Hugging Face model and log it to TrueFoundry",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Examples:
python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation"
python main.py --model-id "gpt2" --ml-repo "my-repo" --model-name "gpt2-model" --model-type "text-generation"
"""
)

parser.add_argument(
"--model-id",
required=True,
help="Hugging Face model ID (e.g., 'microsoft/DialoGPT-medium')"
)

parser.add_argument(
"--ml-repo",
required=True,
help="TrueFoundry ML repository name"
)

parser.add_argument(
"--model-name",
required=True,
help="Name for the model in TrueFoundry"
)

parser.add_argument(
"--model-type",
required=True,
help="Type of the model (e.g., 'text-generation')"
)

parser.add_argument(
"--hf-token",
required=False,
help="Hugging Face token for private models"
)

args = parser.parse_args()

base_dir = os.getenv("BASE_DIR", ".")

with tempfile.TemporaryDirectory(dir=base_dir) as temp_dir:
model_download_path = temp_dir

print(f"Downloading model to {model_download_path}")

snapshot_download(
args.model_id,
revision=None,
cache_dir=None,
local_dir=model_download_path,
ignore_patterns=["*.h5", "*.ot"],
token=args.hf_token,
)

if os.path.exists(os.path.join(model_download_path, '.cache')):
shutil.rmtree(os.path.join(model_download_path, '.cache'))


ML_REPO = args.ml_repo # ML Repo to upload to
MODEL_NAME = args.model_name # Model Name to upload as

client = get_client()
model_version = client.log_model(
ml_repo=ML_REPO,
name=MODEL_NAME,
model_file_or_folder=model_download_path,
framework=TransformersFramework(
model_id=args.model_id,
pipeline_tag=args.model_type
),
)

print(f"\n✅ Success! Model logged to TrueFoundry with FQN: {model_version.fqn}")

if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions hf-model-import-job/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
truefoundry==0.11.12
huggingface_hub>=0.19.0
transformers>=4.30.0
torch>=2.0.0