diff --git a/Dockerfile b/Dockerfile index 2c6a552..1344164 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,45 +21,63 @@ COPY requirements.txt /app/ RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt +# ============================================================================ +# USE CASE 1: BAKE MODEL INTO IMAGE +# ============================================================================ +# Pre-download and cache the model in the image +# Using DistilBERT for sentiment classification - small and efficient +ENV HF_HOME=/app/models +ENV HF_HUB_ENABLE_HF_TRANSFER=0 + +# MODEL BAKING OPTION 1: Automatic via transformers (DEFAULT) +# Pros: Simple, clean, automatic caching +# Cons: Requires network during build +RUN python -c "from transformers import pipeline; pipeline('sentiment-analysis', model='distilbert-base-uncased-finetuned-sst-2-english')" + +# MODEL BAKING OPTION 2: Manual via wget (Alternative) +# Pros: Explicit control, works with custom/hosted models, offline-friendly +# Cons: Need to manually list all model files +# To use: Uncomment below and disable MODEL BAKING OPTION 1 above +# Required files: config.json, model.safetensors, tokenizer_config.json, vocab.txt +# RUN mkdir -p /app/models/distilbert-model && \ +# cd /app/models/distilbert-model && \ +# wget -q https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/config.json && \ +# wget -q https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/model.safetensors && \ +# wget -q https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/tokenizer_config.json && \ +# wget -q https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english/resolve/main/vocab.txt + # Copy application files COPY . /app # ============================================================================ -# OPTION 1: Keep everything from base image (Jupyter, SSH, entrypoint) - DEFAULT +# USE CASE 2: SERVICE STARTUP & ENTRYPOINT # ============================================================================ -# The base image already provides everything: -# - Entrypoint: /opt/nvidia/nvidia_entrypoint.sh (handles CUDA setup) -# - Default CMD: /start.sh (starts Jupyter/SSH automatically based on template settings) -# - Jupyter Notebook (starts if startJupyter=true in template) -# - SSH access (starts if startSsh=true in template) -# +# Choose how the container starts and what services run + +# STARTUP OPTION 1: Keep everything from base image (DEFAULT - Jupyter + SSH) +# Use this for: Interactive development, remote access, Jupyter notebook +# Behavior: +# - Entrypoint: /opt/nvidia/nvidia_entrypoint.sh (CUDA setup) +# - CMD: /start.sh (starts Jupyter/SSH based on template settings) # Just don't override CMD - the base image handles everything! # CMD is not set, so base image default (/start.sh) is used -# ============================================================================ -# OPTION 2: Override CMD but keep entrypoint and services -# ============================================================================ -# If you want to run your own command but still have Jupyter/SSH start: -# - Keep the entrypoint (CUDA setup still happens automatically) -# - Use the provided run.sh script which starts /start.sh in background, -# then runs your application commands -# -# Edit run.sh to customize what runs after services start, then uncomment: +# STARTUP OPTION 2: Run app after services (Jupyter + SSH + Custom app) +# Use this for: Keep services running + run your application in parallel +# Behavior: +# - Entrypoint: /opt/nvidia/nvidia_entrypoint.sh (CUDA setup) +# - CMD: Runs run.sh which starts /start.sh in background, then your app +# To use: Uncomment below # COPY run.sh /app/run.sh # RUN chmod +x /app/run.sh # CMD ["/app/run.sh"] -# -# The run.sh script: -# 1. Starts /start.sh in background (starts Jupyter/SSH) -# 2. Waits for services to initialize -# 3. Runs your application commands -# 4. Waits for background processes -# ============================================================================ -# OPTION 3: Override everything - no Jupyter, no SSH, just your app -# ============================================================================ -# If you don't want any base image services, override both entrypoint and CMD: -# -# ENTRYPOINT [] # Clear entrypoint +# STARTUP OPTION 3: Application only (No Jupyter, no SSH) +# Use this for: Production serverless, minimal overhead, just your app +# Behavior: +# - No Jupyter, no SSH, minimal services +# - Direct app execution +# To use: Uncomment below +# ENTRYPOINT [] # CMD ["python", "/app/main.py"] diff --git a/docs/context.md b/docs/context.md index 2ba1f83..d898478 100644 --- a/docs/context.md +++ b/docs/context.md @@ -111,6 +111,34 @@ The Dockerfiles demonstrate three approaches for handling the base image's entry - `PYTHONUNBUFFERED=1` ensures Python output is immediately visible in logs - The base image entrypoint (`/opt/nvidia/nvidia_entrypoint.sh`) handles CUDA initialization +## Pre-Baked Model + +This template includes a pre-downloaded DistilBERT sentiment classification model baked into the Docker image: + +- **Model**: `distilbert-base-uncased-finetuned-sst-2-english` +- **Task**: Sentiment analysis (POSITIVE/NEGATIVE classification) +- **Size**: ~268MB (small and efficient) +- **Input**: Plain text strings +- **Location**: Cached in `/app/models/` within the image +- **Usage**: Load with `pipeline('sentiment-analysis', model=...)` in Python + +The model runs on GPU if available (via CUDA) or falls back to CPU. See `main.py` for example inference code. + +### Model Download Methods + +**Option A: Automatic (Transformers Pipeline)** +- Downloads via `transformers` library during build +- Model cached automatically in `HF_HOME` directory +- Requires network access during build +- See commented "OPTION A" in Dockerfile + +**Option B: Manual (wget)** +- Download specific model files directly via `wget` +- Useful for custom/hosted models or when you need explicit control +- Set `HF_HOME` to point to downloaded directory +- See commented "OPTION B" in Dockerfile with example wget commands +- To use: Uncomment the RUN commands in Dockerfile and update `main.py` to load from local path + ## Customization Points - **Base Image**: Change `FROM` line to use other Runpod base images @@ -118,4 +146,5 @@ The Dockerfiles demonstrate three approaches for handling the base image's entry - **Python Dependencies**: Update `requirements.txt` - **Application Code**: Replace or extend `main.py` - **Entry Point**: Modify `CMD` in Dockerfile +- **Model Selection**: Replace model ID in Dockerfile and main.py to use different Hugging Face models diff --git a/main.py b/main.py index 990157d..7160123 100644 --- a/main.py +++ b/main.py @@ -1,34 +1,76 @@ """ -Example template application. -This demonstrates how to extend a Runpod PyTorch base image. +Example template application with DistilBERT sentiment classification model. +This demonstrates how to extend a Runpod PyTorch base image and use a baked-in model. """ import sys import torch import time import signal +from transformers import pipeline + def main(): print("Hello from your Runpod template!") print(f"Python version: {sys.version.split()[0]}") print(f"PyTorch version: {torch.__version__}") print(f"CUDA available: {torch.cuda.is_available()}") - + if torch.cuda.is_available(): print(f"CUDA version: {torch.version.cuda}") print(f"GPU device: {torch.cuda.get_device_name(0)}") - - print("\nContainer is running. Add your application logic here.") - print("Press Ctrl+C to stop.") - + + # Initialize the sentiment analysis model (already cached in the image) + print("\nLoading sentiment analysis model...") + device = 0 if torch.cuda.is_available() else -1 + + # ======================================================================== + # USE CASE 1: LOAD MODEL + # ======================================================================== + + # MODEL LOADING OPTION 1: From Hugging Face Hub cache (DEFAULT) + # Use this when: Using transformers pipeline for model baking + # Behavior: Loads from cache, requires local_files_only=True + classifier = pipeline( + "sentiment-analysis", + model="distilbert-base-uncased-finetuned-sst-2-english", + device=device, + model_kwargs={"local_files_only": True}, + ) + + # MODEL LOADING OPTION 2: From local directory (Alternative) + # Use this when: Using wget for model baking (uncomment in Dockerfile) + # Behavior: Loads directly from /app/models/distilbert-model + # To use: Uncomment below and disable MODEL LOADING OPTION 1 + # classifier = pipeline('sentiment-analysis', + # model='/app/models/distilbert-model', + # device=device) + + print("Model loaded successfully!") + + # Example inference + test_texts = [ + "This is a wonderful experience!", + "I really don't like this at all.", + "The weather is nice today.", + ] + + print("\n--- Running sentiment analysis ---") + for text in test_texts: + result = classifier(text) + print(f"Text: {text}") + print(f"Result: {result[0]['label']} (confidence: {result[0]['score']:.4f})\n") + + print("Container is running. Press Ctrl+C to stop.") + # Keep container running def signal_handler(sig, frame): print("\nShutting down...") sys.exit(0) - + signal.signal(signal.SIGINT, signal_handler) signal.signal(signal.SIGTERM, signal_handler) - + # Keep running until terminated try: while True: @@ -36,6 +78,6 @@ def signal_handler(sig, frame): except KeyboardInterrupt: signal_handler(None, None) + if __name__ == "__main__": main() - diff --git a/requirements.txt b/requirements.txt index 21ef1db..b860b59 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,4 +2,5 @@ # Add your packages here numpy>=1.24.0 requests>=2.31.0 +transformers>=4.40.0