feat: RND-114: Add SAM2 integration for Video Object Tracking

HumanSignal · Aug 7, 2024 · 0d4d299 · 0d4d299
1 parent e97995e
commit 0d4d299
Show file tree

Hide file tree

Showing 10 changed files with 678 additions and 0 deletions.
diff --git a/label_studio_ml/examples/segment_anything_2_video/Dockerfile b/label_studio_ml/examples/segment_anything_2_video/Dockerfile
@@ -0,0 +1,59 @@
+FROM pytorch/pytorch:2.1.2-cuda12.1-cudnn8-runtime
+ARG DEBIAN_FRONTEND=noninteractive
+ARG TEST_ENV
+
+WORKDIR /app
+
+RUN conda update conda -y
+
+RUN --mount=type=cache,target="/var/cache/apt",sharing=locked \
+    --mount=type=cache,target="/var/lib/apt/lists",sharing=locked \
+    apt-get -y update \
+    && apt-get install -y git \
+    && apt-get install -y wget \
+    && apt-get install -y g++ freeglut3-dev build-essential libx11-dev \
+    libxmu-dev libxi-dev libglu1-mesa libglu1-mesa-dev libfreeimage-dev \
+    && apt-get -y install ffmpeg libsm6 libxext6 libffi-dev python3-dev python3-pip gcc
+
+ENV PYTHONUNBUFFERED=1 \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PIP_CACHE_DIR=/.cache \
+    PORT=9090 \
+    WORKERS=2 \
+    THREADS=4 \
+    CUDA_HOME=/usr/local/cuda \
+    SEGMENT_ANYTHING_2_REPO_PATH=/segment-anything-2
+
+RUN conda install -c "nvidia/label/cuda-12.1.1" cuda -y
+ENV CUDA_HOME=/opt/conda \
+    TORCH_CUDA_ARCH_LIST="6.0;6.1;7.0;7.5;8.0;8.6+PTX;8.9;9.0"
+
+# install base requirements
+COPY requirements-base.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip install -r requirements-base.txt
+
+COPY requirements.txt .
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip3 install -r requirements.txt
+
+# install segment-anything-2
+RUN cd / && git clone --depth 1 --branch main --single-branch https://github.com/facebookresearch/segment-anything-2.git
+WORKDIR /segment-anything-2
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    pip3 install -e .
+RUN cd checkpoints && ./download_ckpts.sh
+
+WORKDIR /app
+
+# install test requirements if needed
+COPY requirements-test.txt .
+# build only when TEST_ENV="true"
+RUN --mount=type=cache,target=${PIP_CACHE_DIR},sharing=locked \
+    if [ "$TEST_ENV" = "true" ]; then \
+      pip3 install -r requirements-test.txt; \
+    fi
+
+COPY . ./
+
+CMD ["/app/start.sh"]
diff --git a/label_studio_ml/examples/segment_anything_2_video/README.md b/label_studio_ml/examples/segment_anything_2_video/README.md
@@ -0,0 +1,66 @@
+This guide describes the simplest way to start using **SegmentAnything 2** with Label Studio.
+
+## Using SAM2 with Label Studio (tutorial)
+[![Connecting SAM2 Model to Label Studio for Image Annotation ](https://img.youtube.com/vi/FTg8P8z4RgY/0.jpg)](https://www.youtube.com/watch?v=FTg8P8z4RgY)
+
+Note that as of 8/1/2024, SAM2 only runs on GPU.
+
+## Running from source
+
+1. To run the ML backend without Docker, you have to clone the repository and install all dependencies using pip:
+
+```bash
+git clone https://github.com/HumanSignal/label-studio-ml-backend.git
+cd label-studio-ml-backend
+pip install -e .
+cd label_studio_ml/examples/segment_anything_2
+pip install -r requirements.txt
+```
+
+2. Download [`segment-anything-2` repo](https://github.com/facebookresearch/segment-anything-2) into the root directory. Install SegmentAnything model and download checkpoints using [the official Meta documentation](https://github.com/facebookresearch/segment-anything-2?tab=readme-ov-file#installation)
+
+
+3. Then you can start the ML backend on the default port `9090`:
+
+```bash
+cd ../
+label-studio-ml start ./segment_anything_2
+```
+
+4. Connect running ML backend server to Label Studio: go to your project `Settings -> Machine Learning -> Add Model` and specify `http://localhost:9090` as a URL. Read more in the official [Label Studio documentation](https://labelstud.io/guide/ml#Connect-the-model-to-Label-Studio).
+
+## Running with Docker (coming soon)
+
+1. Start Machine Learning backend on `http://localhost:9090` with prebuilt image:
+
+```bash
+docker-compose up
+```
+
+2. Validate that backend is running
+
+```bash
+$ curl http://localhost:9090/
+{"status":"UP"}
+```
+
+3. Connect to the backend from Label Studio running on the same host: go to your project `Settings -> Machine Learning -> Add Model` and specify `http://localhost:9090` as a URL.
+
+
+# Configuration
+Parameters can be set in `docker-compose.yml` before running the container.
+
+
+The following common parameters are available:
+- `DEVICE` - specify the device for the model server (currently only `cuda` is supported, `cpu` is coming soon)
+- `MODEL_CONFIG` - SAM2 model configuration file (`sam2_hiera_l.yaml` by default)
+- `MODEL_CHECKPOINT` - SAM2 model checkpoint file (`sam2_hiera_large.pt` by default)
+- `BASIC_AUTH_USER` - specify the basic auth user for the model server
+- `BASIC_AUTH_PASS` - specify the basic auth password for the model server
+- `LOG_LEVEL` - set the log level for the model server
+- `WORKERS` - specify the number of workers for the model server
+- `THREADS` - specify the number of threads for the model server
+
+# Customization
+
+The ML backend can be customized by adding your own models and logic inside the `./segment_anything_2` directory. 
diff --git a/label_studio_ml/examples/segment_anything_2_video/_wsgi.py b/label_studio_ml/examples/segment_anything_2_video/_wsgi.py
@@ -0,0 +1,121 @@
+import os
+import argparse
+import json
+import logging
+import logging.config
+
+logging.config.dictConfig({
+  "version": 1,
+  "formatters": {
+    "standard": {
+      "format": "[%(asctime)s] [%(levelname)s] [%(name)s::%(funcName)s::%(lineno)d] %(message)s"
+    }
+  },
+  "handlers": {
+    "console": {
+      "class": "logging.StreamHandler",
+      "level": os.getenv('LOG_LEVEL'),
+      "stream": "ext://sys.stdout",
+      "formatter": "standard"
+    }
+  },
+  "root": {
+    "level": os.getenv('LOG_LEVEL'),
+    "handlers": [
+      "console"
+    ],
+    "propagate": True
+  }
+})
+
+from label_studio_ml.api import init_app
+from model import NewModel
+
+
+_DEFAULT_CONFIG_PATH = os.path.join(os.path.dirname(__file__), 'config.json')
+
+
+def get_kwargs_from_config(config_path=_DEFAULT_CONFIG_PATH):
+    if not os.path.exists(config_path):
+        return dict()
+    with open(config_path) as f:
+        config = json.load(f)
+    assert isinstance(config, dict)
+    return config
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description='Label studio')
+    parser.add_argument(
+        '-p', '--port', dest='port', type=int, default=9090,
+        help='Server port')
+    parser.add_argument(
+        '--host', dest='host', type=str, default='0.0.0.0',
+        help='Server host')
+    parser.add_argument(
+        '--kwargs', '--with', dest='kwargs', metavar='KEY=VAL', nargs='+', type=lambda kv: kv.split('='),
+        help='Additional LabelStudioMLBase model initialization kwargs')
+    parser.add_argument(
+        '-d', '--debug', dest='debug', action='store_true',
+        help='Switch debug mode')
+    parser.add_argument(
+        '--log-level', dest='log_level', choices=['DEBUG', 'INFO', 'WARNING', 'ERROR'], default=None,
+        help='Logging level')
+    parser.add_argument(
+        '--model-dir', dest='model_dir', default=os.path.dirname(__file__),
+        help='Directory where models are stored (relative to the project directory)')
+    parser.add_argument(
+        '--check', dest='check', action='store_true',
+        help='Validate model instance before launching server')
+    parser.add_argument('--basic-auth-user',
+                        default=os.environ.get('ML_SERVER_BASIC_AUTH_USER', None),
+                        help='Basic auth user')
+
+    parser.add_argument('--basic-auth-pass',
+                        default=os.environ.get('ML_SERVER_BASIC_AUTH_PASS', None),
+                        help='Basic auth pass')    
+
+    args = parser.parse_args()
+
+    # setup logging level
+    if args.log_level:
+        logging.root.setLevel(args.log_level)
+
+    def isfloat(value):
+        try:
+            float(value)
+            return True
+        except ValueError:
+            return False
+
+    def parse_kwargs():
+        param = dict()
+        for k, v in args.kwargs:
+            if v.isdigit():
+                param[k] = int(v)
+            elif v == 'True' or v == 'true':
+                param[k] = True
+            elif v == 'False' or v == 'false':
+                param[k] = False
+            elif isfloat(v):
+                param[k] = float(v)
+            else:
+                param[k] = v
+        return param
+
+    kwargs = get_kwargs_from_config()
+
+    if args.kwargs:
+        kwargs.update(parse_kwargs())
+
+    if args.check:
+        print('Check "' + NewModel.__name__ + '" instance creation..')
+        model = NewModel(**kwargs)
+
+    app = init_app(model_class=NewModel, basic_auth_user=args.basic_auth_user, basic_auth_pass=args.basic_auth_pass)
+
+    app.run(host=args.host, port=args.port, debug=args.debug)
+
+else:
+    # for uWSGI use
+    app = init_app(model_class=NewModel)
diff --git a/label_studio_ml/examples/segment_anything_2_video/docker-compose.yml b/label_studio_ml/examples/segment_anything_2_video/docker-compose.yml
@@ -0,0 +1,41 @@
+version: "3.8"
+
+services:
+  ml-backend:
+    container_name: ml-backend
+    image: humansignal/ml-backend:v0
+    build:
+      context: .
+      args:
+        TEST_ENV: ${TEST_ENV}
+    environment:
+      # specify these parameters if you want to use basic auth for the model server
+      - BASIC_AUTH_USER=
+      - BASIC_AUTH_PASS=
+      # set the log level for the model server
+      - LOG_LEVEL=DEBUG
+      # any other parameters that you want to pass to the model server
+      - ANY=PARAMETER
+      # specify the number of workers and threads for the model server
+      - WORKERS=1
+      - THREADS=8
+      # specify the model directory (likely you don't need to change this)
+      - MODEL_DIR=/data/models
+      # specify device
+      - DEVICE=cuda  # or 'cpu' (coming soon)
+      # SAM2 model config
+      - MODEL_CONFIG=sam2_hiera_l.yaml
+      # SAM2 checkpoint
+      - MODEL_CHECKPOINT=sam2_hiera_large.pt
+
+      # Specify the Label Studio URL and API key to access
+      # uploaded, local storage and cloud storage files.
+      # Do not use 'localhost' as it does not work within Docker containers.
+      # Use prefix 'http://' or 'https://' for the URL always.
+      # Determine the actual IP using 'ifconfig' (Linux/Mac) or 'ipconfig' (Windows).
+      - LABEL_STUDIO_URL=
+      - LABEL_STUDIO_API_KEY=
+    ports:
+      - "9090:9090"
+    volumes:
+      - "./data/server:/data"