diff --git a/demo/realtime-img2img/requirements.txt b/demo/realtime-img2img/requirements.txt
index a379a58e..487d92a6 100644
--- a/demo/realtime-img2img/requirements.txt
+++ b/demo/realtime-img2img/requirements.txt
@@ -7,7 +7,6 @@ fastapi==0.115.0
 uvicorn[standard]==0.32.0
 Pillow==10.5.0
 compel==2.0.2
-controlnet-aux==0.0.7
 xformers; sys_platform != 'darwin' or platform_machine != 'arm64'
 markdown2
 PyYAML
diff --git a/setup.py b/setup.py
index 6338e387..7a687bcc 100644
--- a/setup.py
+++ b/setup.py
@@ -1,26 +1,77 @@
 import os
 import re
+import sys
 
 from setuptools import find_packages, setup
 
+# Copied from pip_utils.py to avoid import
+def _check_torch_installed():
+    try:
+        import torch
+        import torchvision
+    except Exception:
+        msg = (
+            "Missing required pre-installed packages: torch, torchvision\n"
+            "Install the PyTorch CUDA wheels from the appropriate index first, e.g.:\n"
+            "  pip install --index-url https://download.pytorch.org/whl/cu12x torch torchvision\n"
+            "Replace the index URL and versions to match your CUDA runtime."
+        )
+        raise RuntimeError(msg)
+
+    if not torch.version.cuda:
+        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+
+
+def get_cuda_constraint():
+    cuda_version = os.environ.get("STREAMDIFFUSION_CUDA_VERSION") or \
+                    os.environ.get("CUDA_VERSION")
+
+    if not cuda_version:
+        try:
+            import torch
+            cuda_version = torch.version.cuda
+        except Exception:
+            # might not be available during wheel build, so we have to ignore
+            pass
+
+    if not cuda_version:
+        return ">=11,<13"
+
+    parts = cuda_version.split(".")
+    if len(parts) < 2:
+        raise RuntimeError(f"Invalid CUDA version: {cuda_version}")
+    return f"~={parts[0]}.{parts[1]}"
+
+
+if any(cmd in sys.argv for cmd in ("install", "develop")):
+    _check_torch_installed()
 
 _deps = [
+    f"cuda-python{get_cuda_constraint()}",
+    "xformers==0.0.30",
+    "diffusers==0.35.0",
+    "transformers==4.56.0",
+    "accelerate==1.10.0",
+    "huggingface_hub==0.35.0",
+    "Pillow==11.0.0",
+    "fire==0.6.0",
+    "omegaconf==2.3.0",
+    "onnx==1.18.0",
+    "onnxruntime==1.22.0",
+    "onnxruntime-gpu==1.22.0",
+    "protobuf==4.25.3",
+    "colored==2.2.4",
+    "pywin32==306;sys_platform == 'win32'",
+    "onnx-graphsurgeon==0.5.8",
+    "controlnet-aux==0.0.10",
+    "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
+    "mediapipe==0.10.21",
+    "insightface==0.7.3",
+    # We can't really pin torch version as it depends on CUDA, but we check if it's pre-installed above
     "torch",
-    "xformers",
-    "diffusers>=0.31.0",
-    "transformers",
-    "accelerate",
-    "fire",
-    "omegaconf",
-    "cuda-python==12.9.0",
-    "onnx>=1.15.0",
-    "onnxruntime>=1.16.3",
-    "protobuf>=3.20.2",
-    "colored",
-    "pywin32;sys_platform == 'win32'"
 ]
 
-deps = {b: a for a, b in (re.findall(r"^(([^!=<>~]+)(?:[!=<>~].*)?$)", x)[0] for x in _deps)}
+deps = {b: a for a, b in (re.findall(r"^(([^!=<>~ @]+)(?:[!=<>~ @].*)?$)", x)[0] for x in _deps)}
 
 
 def deps_list(*pkgs):
@@ -30,7 +81,9 @@ def deps_list(*pkgs):
 extras = {}
 extras["xformers"] = deps_list("xformers")
 extras["torch"] = deps_list("torch", "accelerate")
-extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "colored")
+extras["tensorrt"] = deps_list("protobuf", "cuda-python", "onnx", "onnxruntime", "onnxruntime-gpu", "colored")
+extras["controlnet"] = deps_list("onnx-graphsurgeon", "controlnet-aux")
+extras["ipadapter"] = deps_list("diffusers-ipadapter", "mediapipe", "insightface")
 
 extras["dev"] = extras["xformers"] + extras["torch"] + extras["tensorrt"]
 
@@ -40,9 +93,11 @@ def deps_list(*pkgs):
     deps["diffusers"],
     deps["transformers"],
     deps["accelerate"],
-    "diffusers-ipadapter @ git+https://github.com/livepeer/Diffusers_IPAdapter.git@405f87da42932e30bd55ee8dca3ce502d7834a99",
+    deps["huggingface_hub"],
+    deps["Pillow"],
 ]
 
+
 setup(
     name="streamdiffusion",
     version="0.1.1",
diff --git a/src/streamdiffusion/acceleration/tensorrt/utilities.py b/src/streamdiffusion/acceleration/tensorrt/utilities.py
index ce1124df..5d21d21c 100644
--- a/src/streamdiffusion/acceleration/tensorrt/utilities.py
+++ b/src/streamdiffusion/acceleration/tensorrt/utilities.py
@@ -270,6 +270,11 @@ def activate(self, reuse_device_memory=None):
             self.context = self.engine.create_execution_context()
 
     def allocate_buffers(self, shape_dict=None, device="cuda"):
+        # Ensure an execution context exists before allocating buffers
+        if self.context is None:
+            if self.engine is None:
+                raise RuntimeError("TensorRT engine is not loaded; call load() before allocate_buffers().")
+            self.activate()
         # Check if we can reuse existing buffers (OPTIMIZATION)
         if self._can_reuse_buffers(shape_dict, device):
             return
diff --git a/src/streamdiffusion/pip_utils.py b/src/streamdiffusion/pip_utils.py
index 25b024ad..fe0c5ebd 100644
--- a/src/streamdiffusion/pip_utils.py
+++ b/src/streamdiffusion/pip_utils.py
@@ -3,7 +3,7 @@
 import os
 import subprocess
 import sys
-from typing import Dict, Optional
+from typing import Dict, Literal, Optional
 
 from packaging.version import Version
 
@@ -12,6 +12,41 @@
 index_url = os.environ.get("INDEX_URL", "")
 
 
+def _check_torch_installed():
+    try:
+        import torch
+        import torchvision
+    except Exception:
+        msg = (
+            "Missing required pre-installed packages: torch, torchvision\n"
+            "Install the PyTorch CUDA wheels from the appropriate index first, e.g.:\n"
+            "  pip install --index-url https://download.pytorch.org/whl/cu12x torch torchvision\n"
+            "Replace the index URL and versions to match your CUDA runtime."
+        )
+        raise RuntimeError(msg)
+
+    if not torch.version.cuda:
+        raise RuntimeError("Detected CPU-only PyTorch. Install CUDA-enabled torch/vision/audio before installing this package.")
+
+
+def get_cuda_version() -> str:
+    _check_torch_installed()
+
+    import torch
+    return torch.version.cuda
+
+
+def get_cuda_major() -> Optional[Literal["11", "12"]]:
+    version = get_cuda_version()
+    if not version:
+        return None
+
+    major = version.split(".")[0]
+    if major not in ("11", "12"):
+        return None
+    return major
+
+
 def version(package: str) -> Optional[Version]:
     try:
         return Version(importlib.import_module(package).__version__)
diff --git a/src/streamdiffusion/tools/install-tensorrt.py b/src/streamdiffusion/tools/install-tensorrt.py
index 182871c4..0fbb8d2e 100644
--- a/src/streamdiffusion/tools/install-tensorrt.py
+++ b/src/streamdiffusion/tools/install-tensorrt.py
@@ -3,52 +3,43 @@
 import fire
 from packaging.version import Version
 
-from ..pip_utils import is_installed, run_pip, version
+from ..pip_utils import is_installed, run_pip, version, get_cuda_major
 import platform
 
 
-def get_cuda_version_from_torch() -> Optional[Literal["11", "12"]]:
-    try:
-        import torch
-    except ImportError:
-        return None
+def install(cu: Optional[Literal["11", "12"]] = get_cuda_major()):
+    if cu not in ("11", "12"):
+        raise RuntimeError("CUDA major version not detected. Pass --cu 11 or --cu 12 explicitly.")
 
-    return torch.version.cuda.split(".")[0]
-
-
-def install(cu: Optional[Literal["11", "12"]] = get_cuda_version_from_torch()):
-    if cu is None or cu not in ["11", "12"]:
-        print("Could not detect CUDA version. Please specify manually.")
-        return
     print("Installing TensorRT requirements...")
 
-    if is_installed("tensorrt"):
-        if version("tensorrt") < Version("9.0.0"):
-            run_pip("uninstall -y tensorrt")
-
-    cudnn_name = f"nvidia-cudnn-cu{cu}==8.9.4.25"
+    min_trt_version = Version("10.12.0") if cu == "12" else Version("9.0.0")
+    trt_version = version("tensorrt")
+    if trt_version and trt_version < min_trt_version:
+        run_pip("uninstall -y tensorrt")
 
-    if not is_installed("tensorrt"):
-        run_pip(f"install {cudnn_name} --no-cache-dir")
-        run_pip(
-            "install --pre --extra-index-url https://pypi.nvidia.com tensorrt==9.0.1.post11.dev4 --no-cache-dir"
-        )
+    cudnn_package, trt_package = (
+        ("nvidia-cudnn-cu12==9.7.1.26", "tensorrt==10.12.0.36")
+        if cu == "12" else
+        ("nvidia-cudnn-cu11==8.9.7.29", "tensorrt==9.0.1.post11.dev4")
+    )
+    if not is_installed(trt_package):
+        run_pip(f"install {cudnn_package} --no-cache-dir")
+        run_pip(f"install --extra-index-url https://pypi.nvidia.com {trt_package} --no-cache-dir")
 
     if not is_installed("polygraphy"):
         run_pip(
-            "install polygraphy==0.47.1 --extra-index-url https://pypi.ngc.nvidia.com"
+            "install polygraphy==0.49.24 --extra-index-url https://pypi.ngc.nvidia.com"
         )
     if not is_installed("onnx_graphsurgeon"):
         run_pip(
-            "install onnx-graphsurgeon==0.3.26 --extra-index-url https://pypi.ngc.nvidia.com"
+            "install onnx-graphsurgeon==0.5.8 --extra-index-url https://pypi.ngc.nvidia.com"
         )
     if platform.system() == 'Windows' and not is_installed("pywin32"):
         run_pip(
-            "install pywin32"
+            "install pywin32==306"
         )
 
-    pass
-
 
 if __name__ == "__main__":
     fire.Fire(install)
diff --git a/src/streamdiffusion/utils/__init__.py b/src/streamdiffusion/utils/__init__.py
new file mode 100644
index 00000000..e69de29b