only init onnx session once

LeoGrin · LeoGrin · commit 523c05105865 · 2025-03-10T14:25:44.000Z
diff --git a/src/tabpfn/base.py b/src/tabpfn/base.py
@@ -114,11 +114,13 @@ def initialize_tabpfn_model(
 
 def load_onnx_model(
     model_path: str | Path,
+    device: torch.device,
 ) -> ONNXModelWrapper:
     """Load a TabPFN model in ONNX format.
 
     Args:
         model_path: Path to the ONNX model file.
+        device: The device to run the model on.
 
     Returns:
         The loaded ONNX model wrapped in a PyTorch-compatible interface.
@@ -139,7 +141,7 @@ def load_onnx_model(
     if not model_path.exists():
         raise FileNotFoundError(f"ONNX model not found at: {model_path}")
 
-    return ONNXModelWrapper(str(model_path))
+    return ONNXModelWrapper(str(model_path), device)
 
 
 def determine_precision(
diff --git a/src/tabpfn/classifier.py b/src/tabpfn/classifier.py
@@ -389,9 +389,15 @@ def fit(self, X: XType, y: YType) -> Self:
         """
         static_seed, rng = infer_random_state(self.random_state)
 
+        # Determine device and precision
+        self.device_ = infer_device_and_type(self.device)
+        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
+            determine_precision(self.inference_precision, self.device_)
+        )
+
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model("model_classifier.onnx")
+            self.model_ = load_onnx_model("model_classifier.onnx", self.device_)
         else:
             self.model_, self.config_, _ = initialize_tabpfn_model(
                 model_path=self.model_path,
@@ -400,12 +406,6 @@ def fit(self, X: XType, y: YType) -> Self:
                 static_seed=static_seed,
             )
 
-        # Determine device and precision
-        self.device_ = infer_device_and_type(self.device)
-        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
-            determine_precision(self.inference_precision, self.device_)
-        )
-
         # Build the interface_config
         self.interface_config_ = ModelInterfaceConfig.from_user_input(
             inference_config=self.inference_config,
diff --git a/src/tabpfn/misc/onnx_wrapper.py b/src/tabpfn/misc/onnx_wrapper.py
@@ -21,14 +21,21 @@
 class ONNXModelWrapper:
     """Wrap ONNX model to match the PyTorch model interface."""
 
-    def __init__(self, model_path: str):
+    def __init__(self, model_path: str, device: torch.device):
         """Initialize the ONNX model wrapper.
 
         Args:
             model_path: Path to the ONNX model file.
+            device: The device to run the model on.
         """
         self.model_path = model_path
-        self.providers = ["CPUExecutionProvider"]
+        self.device = device
+        if device.type == "cuda":
+            self.providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        elif device.type == "cpu":
+            self.providers = ["CPUExecutionProvider"]
+        else:
+            raise ValueError(f"Invalid device: {device}")
         self.session = ort.InferenceSession(
             model_path,
             providers=self.providers,
@@ -46,24 +53,27 @@ def to(
         Returns:
             self
         """
-        if device.type == "cuda":
-            # Check if CUDA is available in ONNX Runtime
-            cuda_provider = "CUDAExecutionProvider"
-            if cuda_provider in ort.get_available_providers():
-                self.providers = [cuda_provider, "CPUExecutionProvider"]
-                # Reinitialize session with CUDA provider
+        # Only recreate session if device type has changed
+        if device.type != self.device.type:
+            if device.type == "cuda":
+                # Check if CUDA is available in ONNX Runtime
+                cuda_provider = "CUDAExecutionProvider"
+                if cuda_provider in ort.get_available_providers():
+                    self.providers = [cuda_provider, "CPUExecutionProvider"]
+                    # Reinitialize session with CUDA provider
+                    self.session = ort.InferenceSession(
+                        self.model_path,
+                        providers=self.providers,
+                    )
+                # If CUDA is not available, keep current session
+            else:
+                self.providers = ["CPUExecutionProvider"]
                 self.session = ort.InferenceSession(
                     self.model_path,
                     providers=self.providers,
                 )
-            else:
-                pass
-        else:
-            self.providers = ["CPUExecutionProvider"]
-            self.session = ort.InferenceSession(
-                self.model_path,
-                providers=self.providers,
-            )
+            # Update the device
+            self.device = device
         return self
 
     def type(
diff --git a/src/tabpfn/regressor.py b/src/tabpfn/regressor.py
@@ -401,9 +401,15 @@ def fit(self, X: XType, y: YType) -> Self:
         """
         static_seed, rng = infer_random_state(self.random_state)
 
+        # Determine device and precision
+        self.device_ = infer_device_and_type(self.device)
+        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
+            determine_precision(self.inference_precision, self.device_)
+        )
+
         # Load the model and config
         if self.use_onnx:
-            self.model_ = load_onnx_model("model_regressor.onnx")
+            self.model_ = load_onnx_model("model_regressor.onnx", self.device_)
             # Initialize bardist_ for ONNX mode
             # TODO: faster way to do this
             _, self.config_, self.bardist_ = initialize_tabpfn_model(
@@ -420,12 +426,6 @@ def fit(self, X: XType, y: YType) -> Self:
                 static_seed=static_seed,
             )
 
-        # Determine device and precision
-        self.device_ = infer_device_and_type(self.device)
-        (self.use_autocast_, self.forced_inference_dtype_, byte_size) = (
-            determine_precision(self.inference_precision, self.device_)
-        )
-
         # Build the interface_config
         self.interface_config_ = ModelInterfaceConfig.from_user_input(
             inference_config=self.inference_config,