From b6b86121ea528f39ba19216778a39c6dbb992647 Mon Sep 17 00:00:00 2001
From: Sathya Kamesh <draconite98@gmail.com>
Date: Thu, 4 Jul 2024 11:35:31 +0200
Subject: [PATCH 1/7] adding data checks function

---
 tabpfn_client/client.py | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tabpfn_client/client.py b/tabpfn_client/client.py
index bb8a9eb..d31533d 100644
--- a/tabpfn_client/client.py
+++ b/tabpfn_client/client.py
@@ -10,6 +10,7 @@
 import json
 from typing import Literal
 
+from sklearn.utils import check_consistent_length, check_array
 from tabpfn_client.tabpfn_common_utils import utils as common_utils
 
 
@@ -67,6 +68,34 @@ def reset_authorization(self):
     @property
     def is_initialized(self):
         return self.access_token is not None and self.access_token != ""
+    
+    def check_training_data(self, X, y):
+        """
+        Check the integrity of the training data.
+
+        Parameters
+        ----------
+        X : array-like of shape (n_samples, n_features)
+            The training input samples.
+        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
+            The target values.
+
+        Returns
+        -------
+        is_valid : bool
+            True if the data is valid.
+        message : str
+            The message returned from the server.
+        """
+
+        X = check_array(
+            X, accept_sparse="csr", dtype=np.float32, force_all_finite=False
+        )
+        y = check_array(y, ensure_2d=False, dtype=np.float32, force_all_finite=False)
+
+        check_consistent_length(X, y)
+
+        return X, y
 
     def upload_train_set(self, X, y) -> str:
         """
@@ -85,6 +114,10 @@ def upload_train_set(self, X, y) -> str:
             The unique ID of the train set in the server.
 
         """
+
+        #checking the integrity of the data
+        X, y = self.check_training_data(X, y)
+
         X = common_utils.serialize_to_csv_formatted_bytes(X)
         y = common_utils.serialize_to_csv_formatted_bytes(y)
 

From 55f3a67be8cbbd94574d59cb336513f098895899 Mon Sep 17 00:00:00 2001
From: Sathya Kamesh <draconite98@gmail.com>
Date: Thu, 4 Jul 2024 14:55:13 +0200
Subject: [PATCH 2/7] adding unit tests

---
 tabpfn_client/client.py                 | 4 ++--
 tabpfn_client/tests/unit/test_client.py | 9 +++++++++
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/tabpfn_client/client.py b/tabpfn_client/client.py
index d31533d..68ca445 100644
--- a/tabpfn_client/client.py
+++ b/tabpfn_client/client.py
@@ -69,7 +69,8 @@ def reset_authorization(self):
     def is_initialized(self):
         return self.access_token is not None and self.access_token != ""
     
-    def check_training_data(self, X, y):
+    @staticmethod
+    def check_training_data(X, y):
         """
         Check the integrity of the training data.
 
@@ -94,7 +95,6 @@ def check_training_data(self, X, y):
         y = check_array(y, ensure_2d=False, dtype=np.float32, force_all_finite=False)
 
         check_consistent_length(X, y)
-
         return X, y
 
     def upload_train_set(self, X, y) -> str:
diff --git a/tabpfn_client/tests/unit/test_client.py b/tabpfn_client/tests/unit/test_client.py
index 294e51a..11bcb9b 100644
--- a/tabpfn_client/tests/unit/test_client.py
+++ b/tabpfn_client/tests/unit/test_client.py
@@ -221,3 +221,12 @@ def test_validate_response_only_version_check(self):
         response.json.return_value = {"detail": "Some other error"}
         r = self.client._validate_response(response, "test", only_version_check=True)
         self.assertIsNone(r)
+
+    def test_input_data_check(self):
+        X, y = load_breast_cancer(return_X_y=True)
+
+        # Test for valid input
+        ServiceClient.check_training_data(X[:99], y[:99])
+        with self.assertRaises(ValueError) as cm:
+            ServiceClient.check_training_data(X[:99], y[:98])
+        self.assertEqual(str(cm.exception), "Found input variables with inconsistent numbers of samples: [99, 98]")

From ef589781b20c7f76cf9bfb38319ca775adf64a71 Mon Sep 17 00:00:00 2001
From: Sathya Kamesh <draconite98@gmail.com>
Date: Thu, 18 Jul 2024 15:15:50 +0200
Subject: [PATCH 3/7] adding checks for size of input

---
 tabpfn_client/client.py                 |  4 ++++
 tabpfn_client/tests/unit/test_client.py | 12 ++++++++++++
 2 files changed, 16 insertions(+)

diff --git a/tabpfn_client/client.py b/tabpfn_client/client.py
index 68ca445..9c26a72 100644
--- a/tabpfn_client/client.py
+++ b/tabpfn_client/client.py
@@ -95,6 +95,10 @@ def check_training_data(X, y):
         y = check_array(y, ensure_2d=False, dtype=np.float32, force_all_finite=False)
 
         check_consistent_length(X, y)
+        # length and feature assertions
+        assert X.shape[0] <= 10000, "The number of samples should not be more than 10000."
+        assert X.shape[1] <= 500, "The number of features should not be more than 500."
+
         return X, y
 
     def upload_train_set(self, X, y) -> str:
diff --git a/tabpfn_client/tests/unit/test_client.py b/tabpfn_client/tests/unit/test_client.py
index 11bcb9b..7ad7317 100644
--- a/tabpfn_client/tests/unit/test_client.py
+++ b/tabpfn_client/tests/unit/test_client.py
@@ -230,3 +230,15 @@ def test_input_data_check(self):
         with self.assertRaises(ValueError) as cm:
             ServiceClient.check_training_data(X[:99], y[:98])
         self.assertEqual(str(cm.exception), "Found input variables with inconsistent numbers of samples: [99, 98]")
+
+        # Test for oversized data
+        X = np.random.randn(10001,501)
+        y = np.random.randint(0,2,10001)
+
+        with self.assertRaises(AssertionError) as cm:
+            ServiceClient.check_training_data(X[:10000], y[:10000])
+        self.assertEqual(str(cm.exception), "The number of samples should not be more than 10000.")
+
+        with self.assertRaises(AssertionError) as cm:
+            ServiceClient.check_training_data(X[:, :500], y)
+        self.assertEqual(str(cm.exception), "The number of features should not be more than 500.")

From 8b211a6c895096858c5d7bdd401257fbad81fede Mon Sep 17 00:00:00 2001
From: Sathya Kamesh <draconite98@gmail.com>
Date: Thu, 18 Jul 2024 15:22:13 +0200
Subject: [PATCH 4/7] fixing tests

---
 tabpfn_client/tests/unit/test_client.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tabpfn_client/tests/unit/test_client.py b/tabpfn_client/tests/unit/test_client.py
index 7ad7317..3ee7948 100644
--- a/tabpfn_client/tests/unit/test_client.py
+++ b/tabpfn_client/tests/unit/test_client.py
@@ -237,8 +237,8 @@ def test_input_data_check(self):
 
         with self.assertRaises(AssertionError) as cm:
             ServiceClient.check_training_data(X[:10000], y[:10000])
-        self.assertEqual(str(cm.exception), "The number of samples should not be more than 10000.")
+        self.assertEqual(str(cm.exception), "The number of features should not be more than 500.")
 
         with self.assertRaises(AssertionError) as cm:
             ServiceClient.check_training_data(X[:, :500], y)
-        self.assertEqual(str(cm.exception), "The number of features should not be more than 500.")
+        self.assertEqual(str(cm.exception), "The number of samples should not be more than 10000.")

From c93a709d6b6ae9db2b40156cdb73809902889bd8 Mon Sep 17 00:00:00 2001
From: Sathya Kamesh <draconite98@gmail.com>
Date: Fri, 23 Aug 2024 15:40:27 +0200
Subject: [PATCH 5/7] reformat commit

---
 tabpfn_client/client.py                 |  8 ++++----
 tabpfn_client/tests/unit/test_client.py | 17 ++++++++++++-----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/tabpfn_client/client.py b/tabpfn_client/client.py
index 9c26a72..f094bae 100644
--- a/tabpfn_client/client.py
+++ b/tabpfn_client/client.py
@@ -68,7 +68,7 @@ def reset_authorization(self):
     @property
     def is_initialized(self):
         return self.access_token is not None and self.access_token != ""
-    
+
     @staticmethod
     def check_training_data(X, y):
         """
@@ -96,8 +96,8 @@ def check_training_data(X, y):
 
         check_consistent_length(X, y)
         # length and feature assertions
-        assert X.shape[0] <= 10000, "The number of samples should not be more than 10000."
-        assert X.shape[1] <= 500, "The number of features should not be more than 500."
+        assert X.shape[0] <= 10000, "The number of samples cannot be more than 10000."
+        assert X.shape[1] <= 500, "The number of features cannot be more than 500."
 
         return X, y
 
@@ -119,7 +119,7 @@ def upload_train_set(self, X, y) -> str:
 
         """
 
-        #checking the integrity of the data
+        # checking the integrity of the data
         X, y = self.check_training_data(X, y)
 
         X = common_utils.serialize_to_csv_formatted_bytes(X)
diff --git a/tabpfn_client/tests/unit/test_client.py b/tabpfn_client/tests/unit/test_client.py
index 3ee7948..d9fec7c 100644
--- a/tabpfn_client/tests/unit/test_client.py
+++ b/tabpfn_client/tests/unit/test_client.py
@@ -229,16 +229,23 @@ def test_input_data_check(self):
         ServiceClient.check_training_data(X[:99], y[:99])
         with self.assertRaises(ValueError) as cm:
             ServiceClient.check_training_data(X[:99], y[:98])
-        self.assertEqual(str(cm.exception), "Found input variables with inconsistent numbers of samples: [99, 98]")
+        self.assertEqual(
+            str(cm.exception),
+            "Found input variables with inconsistent numbers of samples: [99, 98]",
+        )
 
         # Test for oversized data
-        X = np.random.randn(10001,501)
-        y = np.random.randint(0,2,10001)
+        X = np.random.randn(10001, 501)
+        y = np.random.randint(0, 2, 10001)
 
         with self.assertRaises(AssertionError) as cm:
             ServiceClient.check_training_data(X[:10000], y[:10000])
-        self.assertEqual(str(cm.exception), "The number of features should not be more than 500.")
+        self.assertEqual(
+            str(cm.exception), "The number of features cannot be more than 500."
+        )
 
         with self.assertRaises(AssertionError) as cm:
             ServiceClient.check_training_data(X[:, :500], y)
-        self.assertEqual(str(cm.exception), "The number of samples should not be more than 10000.")
+        self.assertEqual(
+            str(cm.exception), "The number of samples cannot be more than 10000."
+        )

From 6de4513c17a6cb1d006b157d57115843332f993b Mon Sep 17 00:00:00 2001
From: "Liam, SB Hoo" <shibinhoo@gmail.com>
Date: Sat, 21 Sep 2024 18:57:46 +0200
Subject: [PATCH 6/7] Move data size check to estimator, check on train and
 predict, add test

---
 tabpfn_client/client.py                       | 36 -----------
 tabpfn_client/estimator.py                    | 36 +++++++++++
 tabpfn_client/tests/unit/test_client.py       | 28 --------
 .../tests/unit/test_tabpfn_classifier.py      | 64 ++++++++++++++++++-
 .../tests/unit/test_tabpfn_regressor.py       | 64 ++++++++++++++++++-
 5 files changed, 162 insertions(+), 66 deletions(-)

diff --git a/tabpfn_client/client.py b/tabpfn_client/client.py
index 781948b..4ed4339 100644
--- a/tabpfn_client/client.py
+++ b/tabpfn_client/client.py
@@ -11,7 +11,6 @@
 import json
 from typing import Literal
 
-from sklearn.utils import check_consistent_length, check_array
 from tabpfn_client.tabpfn_common_utils import utils as common_utils
 
 
@@ -91,38 +90,6 @@ def reset_authorization(self):
     def is_initialized(self):
         return self.access_token is not None and self.access_token != ""
 
-    @staticmethod
-    def check_training_data(X, y):
-        """
-        Check the integrity of the training data.
-
-        Parameters
-        ----------
-        X : array-like of shape (n_samples, n_features)
-            The training input samples.
-        y : array-like of shape (n_samples,) or (n_samples, n_outputs)
-            The target values.
-
-        Returns
-        -------
-        is_valid : bool
-            True if the data is valid.
-        message : str
-            The message returned from the server.
-        """
-
-        X = check_array(
-            X, accept_sparse="csr", dtype=np.float32, force_all_finite=False
-        )
-        y = check_array(y, ensure_2d=False, dtype=np.float32, force_all_finite=False)
-
-        check_consistent_length(X, y)
-        # length and feature assertions
-        assert X.shape[0] <= 10000, "The number of samples cannot be more than 10000."
-        assert X.shape[1] <= 500, "The number of features cannot be more than 500."
-
-        return X, y
-
     def upload_train_set(self, X, y) -> str:
         """
         Upload a train set to server and return the train set UID if successful.
@@ -141,9 +108,6 @@ def upload_train_set(self, X, y) -> str:
 
         """
 
-        # checking the integrity of the data
-        X, y = self.check_training_data(X, y)
-
         X = common_utils.serialize_to_csv_formatted_bytes(X)
         y = common_utils.serialize_to_csv_formatted_bytes(y)
 
diff --git a/tabpfn_client/estimator.py b/tabpfn_client/estimator.py
index 290600c..8238694 100644
--- a/tabpfn_client/estimator.py
+++ b/tabpfn_client/estimator.py
@@ -6,11 +6,15 @@
 from tabpfn_client import init
 from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
 from sklearn.utils.validation import check_is_fitted
+from sklearn.utils import check_consistent_length
 
 from tabpfn_client import config
 
 logger = logging.getLogger(__name__)
 
+MAX_ROWS = 10000
+MAX_COLS = 500
+
 
 @dataclass(eq=True, frozen=True)
 class PreprocessorConfig:
@@ -194,10 +198,16 @@ def _validate_targets_and_classes(self, y) -> np.ndarray:
         not_nan_mask = ~np.isnan(y)
         self.classes_ = np.unique(y_[not_nan_mask])
 
+    @staticmethod
+    def _validate_data_size(X: np.ndarray, y: np.ndarray | None):
+        if X.shape[0] != y.shape[0]:
+            raise ValueError("X and y must have the same number of samples")
+
     def fit(self, X, y):
         # assert init() is called
         init()
 
+        validate_data_size(X, y)
         self._validate_targets_and_classes(y)
 
         if config.g_tabpfn_config.use_server:
@@ -207,6 +217,7 @@ def fit(self, X, y):
                 ), "Only 'latest_tabpfn_hosted' model is supported at the moment for init(use_server=True)"
             except AssertionError as e:
                 print(e)
+
             self.last_train_set_uid = config.g_tabpfn_config.inference_handler.fit(X, y)
             self.fitted_ = True
         else:
@@ -223,6 +234,8 @@ def predict(self, X):
 
     def predict_proba(self, X):
         check_is_fitted(self)
+        validate_data_size(X)
+
         return config.g_tabpfn_config.inference_handler.predict(
             X,
             task="classification",
@@ -344,6 +357,8 @@ def fit(self, X, y):
         # assert init() is called
         init()
 
+        validate_data_size(X, y)
+
         if config.g_tabpfn_config.use_server:
             self.last_train_set_uid = config.g_tabpfn_config.inference_handler.fit(X, y)
             self.fitted_ = True
@@ -366,6 +381,7 @@ def predict(self, X):
 
     def predict_full(self, X):
         check_is_fitted(self)
+        validate_data_size(X)
 
         estimator_param = self.get_params()
         if "model" in estimator_param:
@@ -393,3 +409,23 @@ def _model_name_to_path(self, model_name: str) -> str:
             return f"{base_path}_{model_name}.ckpt"
         else:
             raise ValueError(f"Invalid model name: {model_name}")
+
+
+def validate_data_size(X: np.ndarray, y: np.ndarray | None = None):
+    """
+    Check the integrity of the training data.
+    - check if the number of rows between X and y is consistent
+        if y is not None (ValueError)
+    - check if the number of rows is less than MAX_ROWS (ValueError)
+    - check if the number of columns is less than MAX_COLS (ValueError)
+    """
+
+    # check if the number of samples is consistent (ValueError)
+    if y is not None:
+        check_consistent_length(X, y)
+
+    # length and feature assertions
+    if X.shape[0] > MAX_ROWS:
+        raise ValueError(f"The number of rows cannot be more than {MAX_ROWS}.")
+    if X.shape[1] > MAX_COLS:
+        raise ValueError(f"The number of columns cannot be more than {MAX_COLS}.")
diff --git a/tabpfn_client/tests/unit/test_client.py b/tabpfn_client/tests/unit/test_client.py
index ded7045..adb1740 100644
--- a/tabpfn_client/tests/unit/test_client.py
+++ b/tabpfn_client/tests/unit/test_client.py
@@ -231,31 +231,3 @@ def test_validate_response_only_version_check(self):
         response.json.return_value = {"detail": "Some other error"}
         r = self.client._validate_response(response, "test", only_version_check=True)
         self.assertIsNone(r)
-
-    def test_input_data_check(self):
-        X, y = load_breast_cancer(return_X_y=True)
-
-        # Test for valid input
-        ServiceClient.check_training_data(X[:99], y[:99])
-        with self.assertRaises(ValueError) as cm:
-            ServiceClient.check_training_data(X[:99], y[:98])
-        self.assertEqual(
-            str(cm.exception),
-            "Found input variables with inconsistent numbers of samples: [99, 98]",
-        )
-
-        # Test for oversized data
-        X = np.random.randn(10001, 501)
-        y = np.random.randint(0, 2, 10001)
-
-        with self.assertRaises(AssertionError) as cm:
-            ServiceClient.check_training_data(X[:10000], y[:10000])
-        self.assertEqual(
-            str(cm.exception), "The number of features cannot be more than 500."
-        )
-
-        with self.assertRaises(AssertionError) as cm:
-            ServiceClient.check_training_data(X[:, :500], y)
-        self.assertEqual(
-            str(cm.exception), "The number of samples cannot be more than 10000."
-        )
diff --git a/tabpfn_client/tests/unit/test_tabpfn_classifier.py b/tabpfn_client/tests/unit/test_tabpfn_classifier.py
index 863d14b..c3419cc 100644
--- a/tabpfn_client/tests/unit/test_tabpfn_classifier.py
+++ b/tabpfn_client/tests/unit/test_tabpfn_classifier.py
@@ -1,5 +1,5 @@
 import unittest
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock
 import shutil
 
 import numpy as np
@@ -14,6 +14,7 @@
 from tabpfn_client.client import ServiceClient
 from tabpfn_client.tests.mock_tabpfn_server import with_mock_server
 from tabpfn_client.constants import CACHE_DIR
+from tabpfn_client import config
 
 
 class TestTabPFNClassifierInit(unittest.TestCase):
@@ -160,3 +161,64 @@ def test_decline_terms_and_cond(self, mock_server, mock_prompt_for_terms_and_con
 
         self.assertRaises(RuntimeError, init, use_server=True)
         self.assertTrue(mock_prompt_for_terms_and_cond.called)
+
+
+class TestTabPFNClassifierInference(unittest.TestCase):
+    def setUp(self):
+        # skip init
+        config.g_tabpfn_config.is_initialized = True
+
+    def tearDown(self):
+        # undo setUp
+        config.reset()
+
+    def test_data_size_check_on_train_with_inconsistent_number_of_samples_raise_error(
+        self,
+    ):
+        X = np.random.rand(10, 5)
+        y = np.random.randint(0, 2, 11)
+        tabpfn = TabPFNClassifier()
+
+        with self.assertRaises(ValueError):
+            tabpfn.fit(X, y)
+
+    def test_data_size_check_on_train_with_oversized_data_raise_error(self):
+        X = np.random.randn(10001, 501)
+        y = np.random.randint(0, 2, 10001)
+
+        tabpfn = TabPFNClassifier()
+
+        # test oversized columns
+        with self.assertRaises(ValueError):
+            tabpfn.fit(X[:10], y[:10])
+
+        # test oversized rows
+        with self.assertRaises(ValueError):
+            tabpfn.fit(X[:, :10], y)
+
+    def test_data_size_check_on_predict_with_oversized_data_raise_error(self):
+        test_X = np.random.randn(10001, 5)
+        tabpfn = TabPFNClassifier()
+
+        # skip fitting
+        tabpfn.fitted_ = True
+
+        # test oversized rows
+        with self.assertRaises(ValueError):
+            tabpfn.predict(test_X)
+
+    def test_data_check_on_predict_with_valid_data_pass(self):
+        test_X = np.random.randn(10, 5)
+        tabpfn = TabPFNClassifier()
+
+        # skip fitting
+        tabpfn.fitted_ = True
+        tabpfn.classes_ = np.array([0, 1])
+
+        # mock prediction
+        config.g_tabpfn_config.inference_handler = MagicMock()
+        config.g_tabpfn_config.inference_handler.predict = MagicMock(
+            return_value={"probas": np.random.rand(10, 2)}
+        )
+
+        tabpfn.predict(test_X)
diff --git a/tabpfn_client/tests/unit/test_tabpfn_regressor.py b/tabpfn_client/tests/unit/test_tabpfn_regressor.py
index 2b5215e..db065a5 100644
--- a/tabpfn_client/tests/unit/test_tabpfn_regressor.py
+++ b/tabpfn_client/tests/unit/test_tabpfn_regressor.py
@@ -1,5 +1,6 @@
 import unittest
-from unittest.mock import patch
+from unittest.mock import patch, MagicMock
+
 import shutil
 import numpy as np
 from sklearn.datasets import load_diabetes
@@ -13,6 +14,7 @@
 from tabpfn_client.client import ServiceClient
 from tabpfn_client.tests.mock_tabpfn_server import with_mock_server
 from tabpfn_client.constants import CACHE_DIR
+from tabpfn_client import config
 
 
 class TestTabPFNRegressorInit(unittest.TestCase):
@@ -175,3 +177,63 @@ def test_decline_terms_and_cond(self, mock_server, mock_prompt_for_terms_and_con
 
         self.assertRaises(RuntimeError, init, use_server=True)
         self.assertTrue(mock_prompt_for_terms_and_cond.called)
+
+
+class TestTabPFNRegressorInference(unittest.TestCase):
+    def setUp(self):
+        # skip init
+        config.g_tabpfn_config.is_initialized = True
+
+    def tearDown(self):
+        # undo setUp
+        config.reset()
+
+    def test_data_size_check_on_train_with_inconsistent_number_of_samples_raise_error(
+        self,
+    ):
+        X = np.random.rand(10, 5)
+        y = np.random.rand(11)
+        tabpfn = TabPFNRegressor()
+
+        with self.assertRaises(ValueError):
+            tabpfn.fit(X, y)
+
+    def test_data_size_check_on_train_with_oversized_data_raise_error(self):
+        X = np.random.randn(10001, 501)
+        y = np.random.randn(10001)
+
+        tabpfn = TabPFNRegressor()
+
+        # test oversized columns
+        with self.assertRaises(ValueError):
+            tabpfn.fit(X[:10], y[:10])
+
+        # test oversized rows
+        with self.assertRaises(ValueError):
+            tabpfn.fit(X[:, :10], y)
+
+    def test_data_size_check_on_predict_with_oversized_data_raise_error(self):
+        test_X = np.random.randn(10001, 5)
+        tabpfn = TabPFNRegressor()
+
+        # skip fitting
+        tabpfn.fitted_ = True
+
+        # test oversized rows
+        with self.assertRaises(ValueError):
+            tabpfn.predict(test_X)
+
+    def test_data_check_on_predict_with_valid_data_pass(self):
+        test_X = np.random.randn(10, 5)
+        tabpfn = TabPFNRegressor()
+
+        # skip fitting
+        tabpfn.fitted_ = True
+
+        # mock prediction
+        config.g_tabpfn_config.inference_handler = MagicMock()
+        config.g_tabpfn_config.inference_handler.predict = MagicMock(
+            return_value={"mean": np.random.randn(10)}
+        )
+
+        tabpfn.predict(test_X)

From 14fa565334c1d5d5f4509b94a434c6b8a91b2199 Mon Sep 17 00:00:00 2001
From: "Liam, SB Hoo" <shibinhoo@gmail.com>
Date: Sun, 22 Sep 2024 10:54:37 +0200
Subject: [PATCH 7/7] Minor change

---
 tabpfn_client/estimator.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tabpfn_client/estimator.py b/tabpfn_client/estimator.py
index 8238694..b630dba 100644
--- a/tabpfn_client/estimator.py
+++ b/tabpfn_client/estimator.py
@@ -6,7 +6,6 @@
 from tabpfn_client import init
 from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin
 from sklearn.utils.validation import check_is_fitted
-from sklearn.utils import check_consistent_length
 
 from tabpfn_client import config
 
@@ -422,7 +421,8 @@ def validate_data_size(X: np.ndarray, y: np.ndarray | None = None):
 
     # check if the number of samples is consistent (ValueError)
     if y is not None:
-        check_consistent_length(X, y)
+        if X.shape[0] != y.shape[0]:
+            raise ValueError("X and y must have the same number of samples")
 
     # length and feature assertions
     if X.shape[0] > MAX_ROWS: