From 69f87b57320519f03e59367d62a2846f7c516e20 Mon Sep 17 00:00:00 2001 From: Akash Gupta Date: Wed, 10 Sep 2025 21:44:57 +0530 Subject: [PATCH 1/5] add code for hf model importer --- hf-model-import-job/README.md | 53 +++++++++++++ hf-model-import-job/main.py | 109 +++++++++++++++++++++++++++ hf-model-import-job/requirements.txt | 4 + 3 files changed, 166 insertions(+) create mode 100644 hf-model-import-job/README.md create mode 100644 hf-model-import-job/main.py create mode 100644 hf-model-import-job/requirements.txt diff --git a/hf-model-import-job/README.md b/hf-model-import-job/README.md new file mode 100644 index 0000000..96199fb --- /dev/null +++ b/hf-model-import-job/README.md @@ -0,0 +1,53 @@ +# Hugging Face Model Import Job + +This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry. + +## Installation + +1. Create a virtual environment (recommended): + +```bash +python3 -m venv venv +source venv/bin/activate # On Windows: venv\Scripts\activate +``` + +2. Install the required dependencies: + +```bash +pip install -r requirements.txt +``` + +3. Make sure you have TrueFoundry credentials configured (via `tfy login` or environment variables). + +## Usage + +### Basic Usage + +```bash +# Make sure to activate your virtual environment first +source venv/bin/activate # On Windows: venv\Scripts\activate + +python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation" +``` + +## Arguments + +- `--model-id` (required): Hugging Face model ID or repository ID +- `--ml-repo` (required): TrueFoundry ML repository name +- `--model-name` (required): Name for the model in TrueFoundry +- `--model-type` (required): Type of the model (e.g., 'text-generation', 'fill-mask') +- `--hf-token` (optional): Hugging Face token for private models + +## Examples + +### Import a popular language model: + +```bash +python main.py --model-id "gpt2" --ml-repo "language-models" --model-name "gpt2-small" --model-type "text-generation" +``` + +### Import a BERT model: + +```bash +python main.py --model-id "bert-base-uncased" --ml-repo "nlp-models" --model-name "bert-base" --model-type "fill-mask" +``` diff --git a/hf-model-import-job/main.py b/hf-model-import-job/main.py new file mode 100644 index 0000000..9b170e4 --- /dev/null +++ b/hf-model-import-job/main.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +""" +Hugging Face Model Import Script for TrueFoundry + +This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry. +""" + +import argparse +import os +import tempfile +import shutil + +from huggingface_hub import snapshot_download +from truefoundry.ml import get_client, TransformersFramework + +def main(): + """Main function to handle command line arguments and orchestrate the process.""" + parser = argparse.ArgumentParser( + description="Download a Hugging Face model and log it to TrueFoundry", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Examples: + python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation" + python main.py --model-id "gpt2" --ml-repo "my-repo" --model-name "gpt2-model" --model-type "text-generation" + """ + ) + + parser.add_argument( + "--model-id", + required=True, + help="Hugging Face model ID (e.g., 'microsoft/DialoGPT-medium')" + ) + + parser.add_argument( + "--ml-repo", + required=True, + help="TrueFoundry ML repository name" + ) + + parser.add_argument( + "--model-name", + required=True, + help="Name for the model in TrueFoundry" + ) + + parser.add_argument( + "--model-type", + required=True, + help="Type of the model (e.g., 'text-generation')" + ) + + parser.add_argument( + "--hf-token", + required=False, + help="Hugging Face token for private models" + ) + + args = parser.parse_args() + + # Create temporary directory for download + temp_dir = tempfile.mkdtemp() + model_download_path = temp_dir + + try: + + snapshot_download( + args.model_id, + revision=None, + cache_dir=None, + local_dir=model_download_path, + ignore_patterns=["*.h5", "*.ot"], + local_dir_use_symlinks=False, + token=args.hf_token, + ) + + if os.path.exists(os.path.join(model_download_path, '.cache')): + shutil.rmtree(os.path.join(model_download_path, '.cache')) + + + ML_REPO = args.ml_repo # ML Repo to upload to + MODEL_NAME = args.model_name # Model Name to upload as + + client = get_client() + model_version = client.log_model( + ml_repo=ML_REPO, + name=MODEL_NAME, + model_file_or_folder=model_download_path, + framework=TransformersFramework( + model_id=args.model_id, + pipeline_tag=args.model_type + ), + ) + + print(f"\n✅ Success! Model logged to TrueFoundry with FQN: {model_version.fqn}") + + # Clean up temporary files + print("Cleaning up temporary files...") + shutil.rmtree(temp_dir) + + except Exception as e: + print(f"\n❌ Error: {str(e)}") + shutil.rmtree(temp_dir) + return 1 + + return 0 + + +if __name__ == "__main__": + exit(main()) diff --git a/hf-model-import-job/requirements.txt b/hf-model-import-job/requirements.txt new file mode 100644 index 0000000..118e1a3 --- /dev/null +++ b/hf-model-import-job/requirements.txt @@ -0,0 +1,4 @@ +truefoundry==0.11.12 +huggingface_hub>=0.19.0 +transformers>=4.30.0 +torch>=2.0.0 \ No newline at end of file From 1010610f66492fbd4786b11e320acfe087e2d5ad Mon Sep 17 00:00:00 2001 From: Akash Gupta Date: Fri, 12 Sep 2025 19:21:15 +0530 Subject: [PATCH 2/5] update deploy file --- hf-model-import-job/deploy.py | 69 +++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 hf-model-import-job/deploy.py diff --git a/hf-model-import-job/deploy.py b/hf-model-import-job/deploy.py new file mode 100644 index 0000000..2c94689 --- /dev/null +++ b/hf-model-import-job/deploy.py @@ -0,0 +1,69 @@ +import logging +from truefoundry.deploy import ( + Param, + Manual, + Build, + Resources, + Job, + PythonBuild, + NodeSelector, + LocalSource, +) +import argparse + +logging.basicConfig(level=logging.INFO) + +parser = argparse.ArgumentParser() +parser.add_argument("--workspace_fqn", required=True, type=str) +args = parser.parse_args() + +job = Job( + name="hf-model-importer", + image=Build( + # Set build_source=LocalSource(local_build=False), in order to deploy code from your local. + # With local_build=False flag, docker image will be built on cloud instead of local + # Else it will try to use docker installed on your local machine to build the image + build_source=LocalSource(local_build=False), + build_spec=PythonBuild( + python_version="3.11", + build_context_path="./hf-model-import-job", + requirements_path="requirements.txt", + command="python main.py --model-id {{model_id}} --model-type {{model_type}} --ml-repo {{ml_repo}} --model-name {{model_name}}", + ), + ), + trigger=Manual(), + params=[ + Param( + name="model_id", description="Hugging face model ID", param_type="string" + ), + Param( + name="model_type", + description="model type from hugging face", + default="text-generation", + param_type="string", + ), + Param( + name="ml_repo", + description="ML repo name to import model to", + param_type="ml_repo", + ), + Param( + name="model_name", + description="Model name in truefoundry model registry", + param_type="string", + ), + ], + resources=Resources( + cpu_request=1.0, + cpu_limit=2.0, + memory_request=2000, + memory_limit=4000, + ephemeral_storage_request=10000, + ephemeral_storage_limit=20000, + ), + retries=0, + workspace_fqn=args.workspace_fqn, +) + + +job.deploy(workspace_fqn=args.workspace_fqn, wait=False) From 2391f4eac820ed9143bb01c6d99e6ab9104777af Mon Sep 17 00:00:00 2001 From: Akash Gupta Date: Tue, 16 Sep 2025 14:52:21 +0530 Subject: [PATCH 3/5] add vol --- hf-model-import-job/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hf-model-import-job/main.py b/hf-model-import-job/main.py index 9b170e4..ffab34f 100644 --- a/hf-model-import-job/main.py +++ b/hf-model-import-job/main.py @@ -56,10 +56,14 @@ def main(): ) args = parser.parse_args() + + base_dir = os.getenv("BASE_DIR",".") # Create temporary directory for download - temp_dir = tempfile.mkdtemp() + temp_dir = tempfile.mkdtemp(dir=base_dir) model_download_path = temp_dir + + print(f"Downloading model to {model_download_path}") try: From 172759a9a45ad830b63cbcc5454ee44246d9a27d Mon Sep 17 00:00:00 2001 From: Akash Gupta Date: Tue, 16 Sep 2025 17:01:15 +0530 Subject: [PATCH 4/5] fix --- hf-model-import-job/main.py | 25 +++++-------------------- 1 file changed, 5 insertions(+), 20 deletions(-) diff --git a/hf-model-import-job/main.py b/hf-model-import-job/main.py index ffab34f..791bd56 100644 --- a/hf-model-import-job/main.py +++ b/hf-model-import-job/main.py @@ -58,14 +58,11 @@ def main(): args = parser.parse_args() base_dir = os.getenv("BASE_DIR",".") - - # Create temporary directory for download - temp_dir = tempfile.mkdtemp(dir=base_dir) - model_download_path = temp_dir - print(f"Downloading model to {model_download_path}") - - try: + with tempfile.TemporaryDirectory(dir=base_dir) as temp_dir: + model_download_path = temp_dir + + print(f"Downloading model to {model_download_path}") snapshot_download( args.model_id, @@ -96,18 +93,6 @@ def main(): ) print(f"\n✅ Success! Model logged to TrueFoundry with FQN: {model_version.fqn}") - - # Clean up temporary files - print("Cleaning up temporary files...") - shutil.rmtree(temp_dir) - - except Exception as e: - print(f"\n❌ Error: {str(e)}") - shutil.rmtree(temp_dir) - return 1 - - return 0 - if __name__ == "__main__": - exit(main()) + main() From 989e5a5174e16cd3ea427831b794fa6ed6f51685 Mon Sep 17 00:00:00 2001 From: Akash Gupta Date: Tue, 16 Sep 2025 17:40:14 +0530 Subject: [PATCH 5/5] fix --- hf-model-import-job/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hf-model-import-job/main.py b/hf-model-import-job/main.py index 791bd56..93acfad 100644 --- a/hf-model-import-job/main.py +++ b/hf-model-import-job/main.py @@ -57,7 +57,7 @@ def main(): args = parser.parse_args() - base_dir = os.getenv("BASE_DIR",".") + base_dir = os.getenv("BASE_DIR", ".") with tempfile.TemporaryDirectory(dir=base_dir) as temp_dir: model_download_path = temp_dir @@ -70,7 +70,6 @@ def main(): cache_dir=None, local_dir=model_download_path, ignore_patterns=["*.h5", "*.ot"], - local_dir_use_symlinks=False, token=args.hf_token, )