From c0c1131d66c13a5a9091aa4502f647e4820d3c03 Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Fri, 27 Oct 2023 12:11:49 -0500
Subject: [PATCH 1/4] Update cluster.py

---
 verticapy/machine_learning/vertica/cluster.py | 453 ++++++++++++++++++
 1 file changed, 453 insertions(+)

diff --git a/verticapy/machine_learning/vertica/cluster.py b/verticapy/machine_learning/vertica/cluster.py
index 3192c292b..482873d5c 100755
--- a/verticapy/machine_learning/vertica/cluster.py
+++ b/verticapy/machine_learning/vertica/cluster.py
@@ -2151,6 +2151,459 @@ class NearestCentroid(MulticlassClassifier):
     p: int, optional
         The p corresponding to the one of the p-distances
         (distance metric used to compute the model).
+
+    Examples
+    ---------
+
+    The following examples provide a basic understanding of usage.
+    For more detailed examples, please refer to the
+    :ref:`user_guide.machine_learning` or the
+    `Examples <https://www.vertica.com/python/examples/>`_
+    section on the website.
+
+    Load data for machine learning
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    We import ``verticapy``:
+
+    .. code-block:: python
+
+        import verticapy as vp
+
+    .. hint::
+
+        By assigning an alias to ``verticapy``, we mitigate the risk of code
+        collisions with other libraries. This precaution is necessary
+        because verticapy uses commonly known function names like "average"
+        and "median", which can potentially lead to naming conflicts.
+        The use of an alias ensures that the functions from verticapy are
+        used as intended without interfering with functions from other
+        libraries.
+
+    For this example, we will use the iris dataset.
+
+    .. code-block:: python
+
+        import verticapy.datasets as vpd
+
+        data = vpd.load_iris()
+
+    .. raw:: html
+        :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_iris.html
+
+    .. note::
+
+        VerticaPy offers a wide range of sample datasets that are
+        ideal for training and testing purposes. You can explore
+        the full list of available datasets in the :ref:`api.datasets`,
+        which provides detailed information on each dataset
+        and how to use them effectively. These datasets are invaluable
+        resources for honing your data analysis and machine learning
+        skills within the VerticaPy environment.
+
+    You can easily divide your dataset into training and testing subsets
+    using the :py:mod:`vDataFrame.train_test_split` method. This is a
+    crucial step when preparing your data for machine learning, as it
+    allows you to evaluate the performance of your models accurately.
+
+    .. code-block:: python
+
+        data = vpd.load_iris()
+        train, test = data.train_test_split(test_size = 0.2)
+
+    .. warning::
+
+        In this case, VerticaPy utilizes seeded randomization to guarantee
+        the reproducibility of your data split. However, please be aware
+        that this approach may lead to reduced performance. For a more
+        efficient data split, you can use the :py:mod:`vDataFrame.to_db`
+        method to save your results into ``tables`` or ``temporary tables``.
+        This will help enhance the overall performance of the process.
+
+    .. ipython:: python
+        :suppress:
+
+        import verticapy as vp
+        import verticapy.datasets as vpd
+        data = vpd.load_iris()
+        train, test = data.train_test_split(test_size = 0.2)
+
+    Model Initialization
+    ^^^^^^^^^^^^^^^^^^^^^
+
+    First we import the ``NearestCentroid`` model:
+
+    .. ipython:: python
+
+        from verticapy.machine_learning.vertica import NearestCentroid
+
+    Then we can create the model:
+
+    .. ipython:: python
+
+        model = NearestCentroid(p = 2)
+
+    .. hint::
+
+        In ``verticapy`` 1.0.x and higher, you do not need to specify the
+        model name, as the name is automatically assigned. If you need to
+        re-use the model, you can fetch the model name from the model's
+        attributes.
+
+    .. important::
+
+        The model name is crucial for the model management system and
+        versioning. It's highly recommended to provide a name if you
+        plan to reuse the model later.
+
+    Model Training
+    ^^^^^^^^^^^^^^^
+
+    We can now fit the model:
+
+    .. ipython:: python
+
+        model.fit(
+            train,
+            [
+                "SepalLengthCm",
+                "SepalWidthCm",
+                "PetalLengthCm",
+                "PetalWidthCm",
+            ],
+            "Species",
+            test,
+        )
+
+    .. important::
+
+        To train a model, you can directly use the ``vDataFrame`` or the
+        name of the relation stored in the database. The test set is optional
+        and is only used to compute the test metrics. In ``verticapy``, we
+        don't work using ``X`` matrices and ``y`` vectors. Instead, we work
+        directly with lists of predictors and the response name.
+
+    Metrics
+    ^^^^^^^^
+
+    We can get the entire report using:
+
+    .. ipython:: python
+        :suppress:
+
+        #result = model.report()
+        html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report.html", "w")
+        html_file.write(result._repr_html_())
+        html_file.close()
+
+    .. code-block:: python
+
+        model.report()
+
+    .. raw:: html
+        :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report.html
+
+    .. important::
+
+        Most metrics are computed using a single SQL query, but some of them might
+        require multiple SQL queries. Selecting only the necessary metrics in the
+        report can help optimize performance.
+        E.g. ``model.report(metrics = ["auc", "accuracy"])``.
+
+    For classification models, we can easily modify the ``cutoff`` to observe
+    the effect on different metrics:
+
+    .. ipython:: python
+        :suppress:
+
+        #result = model.report(cutoff = 0.2)
+        html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report_cutoff.html", "w")
+        html_file.write(result._repr_html_())
+        html_file.close()
+
+    .. code-block:: python
+
+        model.report(cutoff = 0.2)
+
+    .. raw:: html
+        :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report_cutoff.html
+
+
+    You can also use the ``NearestCentroid.score`` function to compute any
+    classification metric. The default metric is the accuracy:
+
+    .. ipython:: python
+
+        model.score(metric = "f1", average = "macro")
+
+    .. note::
+
+        For multi-class scoring, ``verticapy`` allows the
+        flexibility to use three averaging techniques:
+        micro, macro and weighted. Please refer to
+        `this link <https://towardsdatascience.com/micro-macro-weighted-averages-of-f1-score-clearly-explained-b603420b292f>`_
+        for more details on how they are calculated.
+
+    Prediction
+    ^^^^^^^^^^^
+
+    Prediction is straight-forward:
+
+    .. ipython:: python
+        :suppress:
+
+        result = model.predict(
+            test,
+            [
+                "SepalLengthCm",
+                "SepalWidthCm",
+                "PetalLengthCm",
+                "PetalWidthCm",
+            ],
+            "prediction",
+        )
+        html_file = open("figures/machine_learning_vertica_cluster_nearest_centroid_prediction.html", "w")
+        html_file.write(result._repr_html_())
+        html_file.close()
+
+    .. code-block:: python
+
+        model.predict(
+            test,
+            [
+                "SepalLengthCm",
+                "SepalWidthCm",
+                "PetalLengthCm",
+                "PetalWidthCm",
+            ],
+            "prediction",
+        )
+
+    .. raw:: html
+        :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_prediction.html
+
+    .. note::
+
+        Predictions can be made automatically using the test set, in which
+        case you don't need to specify the predictors. Alternatively, you
+        can pass only the ``vDataFrame`` to the
+        :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.predict`
+        function, but in this case, it's essential that the column names of
+        the ``vDataFrame`` match the predictors and response name in the
+        model.
+
+    Probabilities
+    ^^^^^^^^^^^^^^
+
+    It is also easy to get the model's probabilities:
+
+    .. ipython:: python
+        :suppress:
+
+        result = model.predict_proba(
+            test,
+            [
+                "SepalLengthCm",
+                "SepalWidthCm",
+                "PetalLengthCm",
+                "PetalWidthCm",
+            ],
+            "prediction",
+        )
+        html_file = open("figures/machine_learning_vertica_cluster_nearest_centroid_proba.html", "w")
+        html_file.write(result._repr_html_())
+        html_file.close()
+
+    .. code-block:: python
+
+        model.predict_proba(
+            test,
+            [
+                "SepalLengthCm",
+                "SepalWidthCm",
+                "PetalLengthCm",
+                "PetalWidthCm",
+            ],
+            "prediction",
+        )
+
+    .. raw:: html
+        :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_proba.html
+
+    .. note::
+
+        Probabilities are added to the vDataFrame, and VerticaPy uses the
+        corresponding probability function in SQL behind the scenes. You
+        can use the ``pos_label`` parameter to add only the probability
+        of the selected category.
+
+    Confusion Matrix
+    ^^^^^^^^^^^^^^^^^
+
+    You can obtain the confusion matrix.
+
+    .. ipython:: python
+
+        model.confusion_matrix()
+
+    .. hint::
+
+        In the context of multi-class classification, you typically work
+        with an overall confusion matrix that summarizes the classification
+        efficiency across all classes. However, you have the flexibility to
+        specify a ``pos_label`` and adjust the cutoff threshold. In this case,
+        a binary confusion matrix is computed, where the chosen class is treated
+        as the positive class, allowing you to evaluate its efficiency as if it
+        were a binary classification problem.
+
+        **Specific confusion matrix:**
+
+        .. ipython:: python
+
+            model.confusion_matrix(pos_label = "Iris-setosa", cutoff = 0.6)
+
+    .. note::
+
+        In classification, the ``cutoff`` is a threshold value used to
+        determine class assignment based on predicted probabilities or
+        scores from a classification model. In binary classification,
+        if the predicted probability for a specific class is greater
+        than or equal to the cutoff, the instance is assigned to the
+        positive class; otherwise, it is assigned to the negative class.
+        Adjusting the cutoff allows for trade-offs between true positives
+        and false positives, enabling the model to be optimized for
+        specific objectives or to consider the relative costs of different
+        classification errors. The choice of cutoff is critical for
+        tailoring the model's performance to meet specific needs.
+
+    Main Plots (Classification Curves)
+    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+    Classification models allow for the creation of various plots that
+    are very helpful in understanding the model, such as the ROC Curve,
+    PRC Curve, Cutoff Curve, Gain Curve, and more.
+
+    Most of the classification curves can be found in the
+    :ref:`chart_gallery.classification_curve`.
+
+    For example, let's draw the model's ROC curve.
+
+    .. code-block:: python
+
+        model.roc_curve(pos_label = "Iris-setosa")
+
+    .. ipython:: python
+        :suppress:
+
+        vp.set_option("plotting_lib", "plotly")
+        fig = model.roc_curve(pos_label = "Iris-setosa")
+        fig.write_html("figures/machine_learning_vertica_cluster_nearest_centroid_roc.html")
+
+    .. raw:: html
+        :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_roc.html
+
+    .. important::
+
+        Most of the curves have a parameter called ``nbins``, which is essential
+        for estimating metrics. The larger the ``nbins``, the more precise the
+        estimation, but it can significantly impact performance. Exercise caution
+        when increasing this parameter excessively.
+
+    .. hint::
+
+        In binary classification, various curves can be easily plotted. However,
+        in multi-class classification, it's important to select the ``pos_label``
+        , representing the class to be treated as positive when drawing the curve.
+
+    Other Plots
+    ^^^^^^^^^^^^
+
+    **Contour plot** is another useful plot that can be produced
+    for models with two predictors.
+
+    .. code-block:: python
+
+        model.contour(pos_label = "Iris-setosa")
+
+    .. important::
+
+        Machine learning models with two predictors can usually
+        benefit from their own contour plot. This visual representation
+        aids in exploring predictions and gaining a deeper understanding
+        of how these models perform in different scenarios.
+        Please refer to  :ref:`chart_gallery.contour` for more examples.
+
+    Parameter Modification
+    ^^^^^^^^^^^^^^^^^^^^^^^
+
+    In order to see the parameters:
+
+    .. ipython:: python
+
+        model.get_params()
+
+    And to manually change some of the parameters:
+
+    .. ipython:: python
+
+        model.set_params({'p': 3})
+
+    Model Register
+    ^^^^^^^^^^^^^^
+
+    In order to register the model for tracking and versioning:
+
+    .. code-block:: python
+
+        model.register("model_v1")
+
+    Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html`
+    for more details on model tracking and versioning.
+
+    Model Exporting
+    ^^^^^^^^^^^^^^^^
+
+    **To Memmodel**
+
+    .. code-block:: python
+
+        model.to_memmodel()
+
+    .. note::
+
+        ``MemModel`` objects serve as in-memory representations of machine
+        learning models. They can be used for both in-database and in-memory
+        prediction tasks. These objects can be pickled in the same way that
+        you would pickle a ``scikit-learn`` model.
+
+    The following methods for exporting the model use ``MemModel``, and it
+    is recommended to use ``MemModel`` directly.
+
+    **To SQL**
+
+    You can get the SQL code by:
+
+    .. ipython:: python
+
+        model.to_sql()
+
+    **To Python**
+
+    To obtain the prediction function in Python syntax, use the following code:
+
+    .. ipython:: python
+
+        X = [[5, 2, 3, 1]]
+        #model.to_python()(X)
+
+    .. hint::
+
+        The
+        :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.to_python`
+        method is used to retrieve predictions,
+        probabilities, or cluster distances. For specific details on how to
+        use this method for different model types, refer to the relevant
+        documentation for each model.
     """
 
     # Properties.

From 0e5c0c096666dd8f1bead8540abf23257ebae55d Mon Sep 17 00:00:00 2001
From: Badr <badr.ouali@outlook.fr>
Date: Sat, 28 Oct 2023 23:33:37 -0400
Subject: [PATCH 2/4] correcting bugs and doc - Nearest Centroids

---
 .../machine_learning/memmodel/cluster.py      | 10 ++--
 .../metrics/classification.py                 |  2 +-
 verticapy/machine_learning/vertica/base.py    |  2 +-
 verticapy/machine_learning/vertica/cluster.py | 57 +++++++++----------
 4 files changed, 34 insertions(+), 37 deletions(-)

diff --git a/verticapy/machine_learning/memmodel/cluster.py b/verticapy/machine_learning/memmodel/cluster.py
index 79cf4d115..68f59aa8f 100755
--- a/verticapy/machine_learning/memmodel/cluster.py
+++ b/verticapy/machine_learning/memmodel/cluster.py
@@ -69,7 +69,7 @@ def __init__(
         clusters_names: Optional[ArrayLike] = None,
     ) -> None:
         clusters_names = format_type(clusters_names, dtype=list)
-        self.clusters_ = np.array(clusters)
+        self.clusters_ = np.array(clusters).astype(float)
         self.classes_ = np.array(clusters_names)
         self.p_ = p
 
@@ -380,7 +380,7 @@ def object_type(self) -> Literal["KMeans"]:
     # System & Special Methods.
 
     def __init__(self, clusters: ArrayLike, p: int = 2) -> None:
-        self.clusters_ = np.array(clusters)
+        self.clusters_ = np.array(clusters).astype(float)
         self.p_ = p
 
 
@@ -518,7 +518,7 @@ def __init__(
         classes: ArrayLike,
         p: int = 2,
     ) -> None:
-        self.clusters_ = np.array(clusters)
+        self.clusters_ = np.array(clusters).astype(float)
         self.classes_ = np.array(classes)
         self.p_ = p
 
@@ -720,7 +720,7 @@ def __init__(
         cluster_size, cluster_score = format_type(
             cluster_size, cluster_score, dtype=list
         )
-        self.clusters_ = np.array(clusters)
+        self.clusters_ = np.array(clusters).astype(float)
         self.children_left_ = np.array(children_left)
         self.children_right_ = np.array(children_right)
         self.cluster_size_ = np.array(cluster_size)
@@ -1100,7 +1100,7 @@ def __init__(
         is_categorical: Optional[ArrayLike] = None,
     ) -> None:
         is_categorical = format_type(is_categorical, dtype=list)
-        self.clusters_ = np.array(clusters)
+        self.clusters_ = np.array(clusters).astype(float)
         self.p_ = p
         self.gamma_ = gamma
         self.is_categorical_ = np.array(is_categorical)
diff --git a/verticapy/machine_learning/metrics/classification.py b/verticapy/machine_learning/metrics/classification.py
index e8125fd67..2171defcd 100755
--- a/verticapy/machine_learning/metrics/classification.py
+++ b/verticapy/machine_learning/metrics/classification.py
@@ -229,7 +229,7 @@ def _compute_final_score(
             "Parameter 'pos_label' can only be used when parameter 'average' is set to 'binary' or undefined."
         )
     if not (isinstance(pos_label, NoneType)) and not (isinstance(labels, NoneType)):
-        raise ValueError("Parameters 'pos_label' and 'labels' can not be both defined.")
+        labels = None
     if (
         isinstance(pos_label, NoneType)
         and isinstance(labels, NoneType)
diff --git a/verticapy/machine_learning/vertica/base.py b/verticapy/machine_learning/vertica/base.py
index 77ebffc9f..9f67ef452 100755
--- a/verticapy/machine_learning/vertica/base.py
+++ b/verticapy/machine_learning/vertica/base.py
@@ -2215,7 +2215,7 @@ def score(
         kwargs = {}
         if metric not in ("aic", "bic"):
             labels = None
-            if isinstance(pos_label, NoneType):
+            if isinstance(pos_label, NoneType) or not (self._is_native):
                 labels = self.classes_
             kwargs = {
                 "average": average,
diff --git a/verticapy/machine_learning/vertica/cluster.py b/verticapy/machine_learning/vertica/cluster.py
index 482873d5c..5cafd6c07 100755
--- a/verticapy/machine_learning/vertica/cluster.py
+++ b/verticapy/machine_learning/vertica/cluster.py
@@ -615,7 +615,7 @@ def _compute_attributes(self) -> None:
         Computes the model's attributes.
         """
         centers = self.get_vertica_attributes("centers")
-        self.clusters_ = centers.to_numpy()
+        self.clusters_ = centers.to_numpy().astype(float)
         self.p_ = 2
         self._compute_metrics()
 
@@ -1128,7 +1128,7 @@ def _compute_attributes(self) -> None:
         Computes the model's attributes.
         """
         centers = self.get_vertica_attributes("centers")
-        self.clusters_ = centers.to_numpy()
+        self.clusters_ = centers.to_numpy().astype(float)
         self.p_ = 2
         self.gamma_ = self.parameters["gamma"]
         dtypes = centers.dtype
@@ -1648,7 +1648,7 @@ def _compute_attributes(self) -> None:
         """
         centers = self.get_vertica_attributes("BKTree")
         self.tree_ = copy.deepcopy(centers)
-        self.clusters_ = centers.to_numpy()[:, 1 : len(self.X) + 1]
+        self.clusters_ = centers.to_numpy()[:, 1 : len(self.X) + 1].astype(float)
         self.children_left_ = np.array(centers["left_child"])
         self.children_right_ = np.array(centers["right_child"])
         self.cluster_size_ = np.array(centers["cluster_size"])
@@ -2146,6 +2146,15 @@ class NearestCentroid(MulticlassClassifier):
     This object uses pure SQL to compute the distances and
     final score.
 
+    .. important::
+
+        This algorithm is not Vertica Native and relies solely
+        on SQL for attribute computation. While this model does
+        not take advantage of the benefits provided by a model
+        management system, including versioning and tracking,
+        the SQL code it generates can still be used to create a
+        pipeline.
+
     Parameters
     ----------
     p: int, optional
@@ -2243,19 +2252,6 @@ class NearestCentroid(MulticlassClassifier):
 
         model = NearestCentroid(p = 2)
 
-    .. hint::
-
-        In ``verticapy`` 1.0.x and higher, you do not need to specify the
-        model name, as the name is automatically assigned. If you need to
-        re-use the model, you can fetch the model name from the model's
-        attributes.
-
-    .. important::
-
-        The model name is crucial for the model management system and
-        versioning. It's highly recommended to provide a name if you
-        plan to reuse the model later.
-
     Model Training
     ^^^^^^^^^^^^^^^
 
@@ -2283,6 +2279,12 @@ class NearestCentroid(MulticlassClassifier):
         don't work using ``X`` matrices and ``y`` vectors. Instead, we work
         directly with lists of predictors and the response name.
 
+    .. important::
+
+        As this model is not native, it solely relies on SQL statements to
+        compute various attributes, storing them within the object. No data
+        is saved in the database.
+
     Metrics
     ^^^^^^^^
 
@@ -2291,7 +2293,7 @@ class NearestCentroid(MulticlassClassifier):
     .. ipython:: python
         :suppress:
 
-        #result = model.report()
+        result = model.report()
         html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report.html", "w")
         html_file.write(result._repr_html_())
         html_file.close()
@@ -2316,7 +2318,7 @@ class NearestCentroid(MulticlassClassifier):
     .. ipython:: python
         :suppress:
 
-        #result = model.report(cutoff = 0.2)
+        result = model.report(cutoff = 0.2)
         html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report_cutoff.html", "w")
         html_file.write(result._repr_html_())
         html_file.close()
@@ -2387,7 +2389,7 @@ class NearestCentroid(MulticlassClassifier):
         Predictions can be made automatically using the test set, in which
         case you don't need to specify the predictors. Alternatively, you
         can pass only the ``vDataFrame`` to the
-        :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.predict`
+        :py:mod:`verticapy.machine_learning.vertica.cluster.NearestCentroid.predict`
         function, but in this case, it's essential that the column names of
         the ``vDataFrame`` match the predictors and response name in the
         model.
@@ -2551,14 +2553,9 @@ class NearestCentroid(MulticlassClassifier):
     Model Register
     ^^^^^^^^^^^^^^
 
-    In order to register the model for tracking and versioning:
-
-    .. code-block:: python
-
-        model.register("model_v1")
-
-    Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html`
-    for more details on model tracking and versioning.
+    As this model is not native, it does not support model management and
+    versioning. However, it is possible to use the SQL code it generates
+    for deployment.
 
     Model Exporting
     ^^^^^^^^^^^^^^^^
@@ -2594,12 +2591,12 @@ class NearestCentroid(MulticlassClassifier):
     .. ipython:: python
 
         X = [[5, 2, 3, 1]]
-        #model.to_python()(X)
+        model.to_python()(X)
 
     .. hint::
 
         The
-        :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.to_python`
+        :py:mod:`verticapy.machine_learning.vertica.cluster.NearestCentroid.to_python`
         method is used to retrieve predictions,
         probabilities, or cluster distances. For specific details on how to
         use this method for different model types, refer to the relevant
@@ -2664,7 +2661,7 @@ def _compute_attributes(self) -> None:
             ORDER BY {self.y} ASC""",
             title="Getting Model Centroids.",
         )
-        self.clusters_ = centroids.to_numpy()[:, 0:-1]
+        self.clusters_ = centroids.to_numpy()[:, 0:-1].astype(float)
         self.classes_ = self._array_to_int(centroids.to_numpy()[:, -1])
         self.p_ = self.parameters["p"]
 

From 9d94a502ecd9fe66bc254e1efd691341793c1816 Mon Sep 17 00:00:00 2001
From: Badr <badr.ouali@outlook.fr>
Date: Sun, 29 Oct 2023 08:41:45 -0400
Subject: [PATCH 3/4] Update cluster.py

---
 verticapy/machine_learning/vertica/cluster.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/verticapy/machine_learning/vertica/cluster.py b/verticapy/machine_learning/vertica/cluster.py
index 5cafd6c07..8871f7fea 100755
--- a/verticapy/machine_learning/vertica/cluster.py
+++ b/verticapy/machine_learning/vertica/cluster.py
@@ -615,7 +615,7 @@ def _compute_attributes(self) -> None:
         Computes the model's attributes.
         """
         centers = self.get_vertica_attributes("centers")
-        self.clusters_ = centers.to_numpy().astype(float)
+        self.clusters_ = centers.to_numpy()
         self.p_ = 2
         self._compute_metrics()
 
@@ -1128,7 +1128,7 @@ def _compute_attributes(self) -> None:
         Computes the model's attributes.
         """
         centers = self.get_vertica_attributes("centers")
-        self.clusters_ = centers.to_numpy().astype(float)
+        self.clusters_ = centers.to_numpy()
         self.p_ = 2
         self.gamma_ = self.parameters["gamma"]
         dtypes = centers.dtype

From 3990c73f243b61279edb52ad4cd350bdc9d9fd6c Mon Sep 17 00:00:00 2001
From: Badr <badr.ouali@outlook.fr>
Date: Sun, 29 Oct 2023 12:21:50 -0400
Subject: [PATCH 4/4] Update cluster.py

---
 verticapy/machine_learning/memmodel/cluster.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/verticapy/machine_learning/memmodel/cluster.py b/verticapy/machine_learning/memmodel/cluster.py
index 68f59aa8f..c96098d69 100755
--- a/verticapy/machine_learning/memmodel/cluster.py
+++ b/verticapy/machine_learning/memmodel/cluster.py
@@ -1100,7 +1100,7 @@ def __init__(
         is_categorical: Optional[ArrayLike] = None,
     ) -> None:
         is_categorical = format_type(is_categorical, dtype=list)
-        self.clusters_ = np.array(clusters).astype(float)
+        self.clusters_ = np.array(clusters)
         self.p_ = p
         self.gamma_ = gamma
         self.is_categorical_ = np.array(is_categorical)