truefoundry · akashg3627 · Sep 10, 2025 · Sep 12, 2025 · Sep 16, 2025 · Sep 16, 2025
diff --git a/hf-model-import-job/README.md b/hf-model-import-job/README.md
@@ -0,0 +1,53 @@
+# Hugging Face Model Import Job
+
+This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry.
+
+## Installation
+
+1. Create a virtual environment (recommended):
+
+```bash
+python3 -m venv venv
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+```
+
+2. Install the required dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+3. Make sure you have TrueFoundry credentials configured (via `tfy login` or environment variables).
+
+## Usage
+
+### Basic Usage
+
+```bash
+# Make sure to activate your virtual environment first
+source venv/bin/activate  # On Windows: venv\Scripts\activate
+
+python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation"
+```
+
+## Arguments
+
+- `--model-id` (required): Hugging Face model ID or repository ID
+- `--ml-repo` (required): TrueFoundry ML repository name
+- `--model-name` (required): Name for the model in TrueFoundry
+- `--model-type` (required): Type of the model (e.g., 'text-generation', 'fill-mask')
+- `--hf-token` (optional): Hugging Face token for private models
+
+## Examples
+
+### Import a popular language model:
+
+```bash
+python main.py --model-id "gpt2" --ml-repo "language-models" --model-name "gpt2-small" --model-type "text-generation"
+```
+
+### Import a BERT model:
+
+```bash
+python main.py --model-id "bert-base-uncased" --ml-repo "nlp-models" --model-name "bert-base" --model-type "fill-mask"
+```
diff --git a/hf-model-import-job/deploy.py b/hf-model-import-job/deploy.py
@@ -0,0 +1,69 @@
+import logging
+from truefoundry.deploy import (
+    Param,
+    Manual,
+    Build,
+    Resources,
+    Job,
+    PythonBuild,
+    NodeSelector,
+    LocalSource,
+)
+import argparse
+
+logging.basicConfig(level=logging.INFO)
+
+parser = argparse.ArgumentParser()
+parser.add_argument("--workspace_fqn", required=True, type=str)
+args = parser.parse_args()
+
+job = Job(
+    name="hf-model-importer",
+    image=Build(
+        # Set build_source=LocalSource(local_build=False), in order to deploy code from your local.
+        # With local_build=False flag, docker image will be built on cloud instead of local
+        # Else it will try to use docker installed on your local machine to build the image
+        build_source=LocalSource(local_build=False),
+        build_spec=PythonBuild(
+            python_version="3.11",
+            build_context_path="./hf-model-import-job",
+            requirements_path="requirements.txt",
+            command="python main.py --model-id {{model_id}} --model-type {{model_type}} --ml-repo {{ml_repo}} --model-name {{model_name}}",
+        ),
+    ),
+    trigger=Manual(),
+    params=[
+        Param(
+            name="model_id", description="Hugging face model ID", param_type="string"
+        ),
+        Param(
+            name="model_type",
+            description="model type from hugging face",
+            default="text-generation",
+            param_type="string",
+        ),
+        Param(
+            name="ml_repo",
+            description="ML repo name to import model to",
+            param_type="ml_repo",
+        ),
+        Param(
+            name="model_name",
+            description="Model name in truefoundry model registry",
+            param_type="string",
+        ),
+    ],
+    resources=Resources(
+        cpu_request=1.0,
+        cpu_limit=2.0,
+        memory_request=2000,
+        memory_limit=4000,
+        ephemeral_storage_request=10000,
+        ephemeral_storage_limit=20000,
+    ),
+    retries=0,
+    workspace_fqn=args.workspace_fqn,
+)
+
+
+job.deploy(workspace_fqn=args.workspace_fqn, wait=False)
diff --git a/hf-model-import-job/main.py b/hf-model-import-job/main.py
@@ -0,0 +1,97 @@
+#!/usr/bin/env python3
+"""
+Hugging Face Model Import Script for TrueFoundry
+
+This script downloads a Hugging Face model from a given URL and logs it to TrueFoundry's model registry.
+"""
+
+import argparse
+import os
+import tempfile
+import shutil
+
+from huggingface_hub import snapshot_download
+from truefoundry.ml import get_client, TransformersFramework
+
+def main():
+    """Main function to handle command line arguments and orchestrate the process."""
+    parser = argparse.ArgumentParser(
+        description="Download a Hugging Face model and log it to TrueFoundry",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog="""
+Examples:
+  python main.py --model-id "microsoft/DialoGPT-medium" --ml-repo "my-ml-repo" --model-name "dialogpt-medium" --model-type "text-generation"
+  python main.py --model-id "gpt2" --ml-repo "my-repo" --model-name "gpt2-model" --model-type "text-generation"
+        """
+    )
+
+    parser.add_argument(
+        "--model-id",
+        required=True,
+        help="Hugging Face model ID (e.g., 'microsoft/DialoGPT-medium')"
+    )
+
+    parser.add_argument(
+        "--ml-repo",
+        required=True,
+        help="TrueFoundry ML repository name"
+    )
+
+    parser.add_argument(
+        "--model-name",
+        required=True,
+        help="Name for the model in TrueFoundry"
+    )
+
+    parser.add_argument(
+        "--model-type",
+        required=True,
+        help="Type of the model (e.g., 'text-generation')"
+    )
+
+    parser.add_argument(
+        "--hf-token",
+        required=False,
+        help="Hugging Face token for private models"
+    )
+
+    args = parser.parse_args()
+
+    base_dir = os.getenv("BASE_DIR", ".")    
+
+    with tempfile.TemporaryDirectory(dir=base_dir) as temp_dir:
+        model_download_path = temp_dir
+
+        print(f"Downloading model to {model_download_path}")
+
+        snapshot_download(
+            args.model_id,
+            revision=None,
+            cache_dir=None,
+            local_dir=model_download_path,
+            ignore_patterns=["*.h5", "*.ot"],
+            token=args.hf_token,
+        )
+
+        if os.path.exists(os.path.join(model_download_path, '.cache')):
+            shutil.rmtree(os.path.join(model_download_path, '.cache'))
+
+
+        ML_REPO = args.ml_repo         # ML Repo to upload to
+        MODEL_NAME = args.model_name  # Model Name to upload as
+
+        client = get_client()
+        model_version = client.log_model(
+            ml_repo=ML_REPO,
+            name=MODEL_NAME,
+            model_file_or_folder=model_download_path,
+            framework=TransformersFramework(
+                model_id=args.model_id,
+                pipeline_tag=args.model_type
+            ),
+        )
+
+        print(f"\n✅ Success! Model logged to TrueFoundry with FQN: {model_version.fqn}")
+
+if __name__ == "__main__":
+    main()
diff --git a/hf-model-import-job/requirements.txt b/hf-model-import-job/requirements.txt
@@ -0,0 +1,4 @@
+truefoundry==0.11.12
+huggingface_hub>=0.19.0
+transformers>=4.30.0
+torch>=2.0.0