From c0c1131d66c13a5a9091aa4502f647e4820d3c03 Mon Sep 17 00:00:00 2001
From: umar <46414488+mail4umar@users.noreply.github.com>
Date: Fri, 27 Oct 2023 12:11:49 -0500
Subject: [PATCH 1/4] Update cluster.py
---
verticapy/machine_learning/vertica/cluster.py | 453 ++++++++++++++++++
1 file changed, 453 insertions(+)
diff --git a/verticapy/machine_learning/vertica/cluster.py b/verticapy/machine_learning/vertica/cluster.py
index 3192c292b..482873d5c 100755
--- a/verticapy/machine_learning/vertica/cluster.py
+++ b/verticapy/machine_learning/vertica/cluster.py
@@ -2151,6 +2151,459 @@ class NearestCentroid(MulticlassClassifier):
p: int, optional
The p corresponding to the one of the p-distances
(distance metric used to compute the model).
+
+ Examples
+ ---------
+
+ The following examples provide a basic understanding of usage.
+ For more detailed examples, please refer to the
+ :ref:`user_guide.machine_learning` or the
+ `Examples `_
+ section on the website.
+
+ Load data for machine learning
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ We import ``verticapy``:
+
+ .. code-block:: python
+
+ import verticapy as vp
+
+ .. hint::
+
+ By assigning an alias to ``verticapy``, we mitigate the risk of code
+ collisions with other libraries. This precaution is necessary
+ because verticapy uses commonly known function names like "average"
+ and "median", which can potentially lead to naming conflicts.
+ The use of an alias ensures that the functions from verticapy are
+ used as intended without interfering with functions from other
+ libraries.
+
+ For this example, we will use the iris dataset.
+
+ .. code-block:: python
+
+ import verticapy.datasets as vpd
+
+ data = vpd.load_iris()
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/datasets_loaders_load_iris.html
+
+ .. note::
+
+ VerticaPy offers a wide range of sample datasets that are
+ ideal for training and testing purposes. You can explore
+ the full list of available datasets in the :ref:`api.datasets`,
+ which provides detailed information on each dataset
+ and how to use them effectively. These datasets are invaluable
+ resources for honing your data analysis and machine learning
+ skills within the VerticaPy environment.
+
+ You can easily divide your dataset into training and testing subsets
+ using the :py:mod:`vDataFrame.train_test_split` method. This is a
+ crucial step when preparing your data for machine learning, as it
+ allows you to evaluate the performance of your models accurately.
+
+ .. code-block:: python
+
+ data = vpd.load_iris()
+ train, test = data.train_test_split(test_size = 0.2)
+
+ .. warning::
+
+ In this case, VerticaPy utilizes seeded randomization to guarantee
+ the reproducibility of your data split. However, please be aware
+ that this approach may lead to reduced performance. For a more
+ efficient data split, you can use the :py:mod:`vDataFrame.to_db`
+ method to save your results into ``tables`` or ``temporary tables``.
+ This will help enhance the overall performance of the process.
+
+ .. ipython:: python
+ :suppress:
+
+ import verticapy as vp
+ import verticapy.datasets as vpd
+ data = vpd.load_iris()
+ train, test = data.train_test_split(test_size = 0.2)
+
+ Model Initialization
+ ^^^^^^^^^^^^^^^^^^^^^
+
+ First we import the ``NearestCentroid`` model:
+
+ .. ipython:: python
+
+ from verticapy.machine_learning.vertica import NearestCentroid
+
+ Then we can create the model:
+
+ .. ipython:: python
+
+ model = NearestCentroid(p = 2)
+
+ .. hint::
+
+ In ``verticapy`` 1.0.x and higher, you do not need to specify the
+ model name, as the name is automatically assigned. If you need to
+ re-use the model, you can fetch the model name from the model's
+ attributes.
+
+ .. important::
+
+ The model name is crucial for the model management system and
+ versioning. It's highly recommended to provide a name if you
+ plan to reuse the model later.
+
+ Model Training
+ ^^^^^^^^^^^^^^^
+
+ We can now fit the model:
+
+ .. ipython:: python
+
+ model.fit(
+ train,
+ [
+ "SepalLengthCm",
+ "SepalWidthCm",
+ "PetalLengthCm",
+ "PetalWidthCm",
+ ],
+ "Species",
+ test,
+ )
+
+ .. important::
+
+ To train a model, you can directly use the ``vDataFrame`` or the
+ name of the relation stored in the database. The test set is optional
+ and is only used to compute the test metrics. In ``verticapy``, we
+ don't work using ``X`` matrices and ``y`` vectors. Instead, we work
+ directly with lists of predictors and the response name.
+
+ Metrics
+ ^^^^^^^^
+
+ We can get the entire report using:
+
+ .. ipython:: python
+ :suppress:
+
+ #result = model.report()
+ html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report.html", "w")
+ html_file.write(result._repr_html_())
+ html_file.close()
+
+ .. code-block:: python
+
+ model.report()
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report.html
+
+ .. important::
+
+ Most metrics are computed using a single SQL query, but some of them might
+ require multiple SQL queries. Selecting only the necessary metrics in the
+ report can help optimize performance.
+ E.g. ``model.report(metrics = ["auc", "accuracy"])``.
+
+ For classification models, we can easily modify the ``cutoff`` to observe
+ the effect on different metrics:
+
+ .. ipython:: python
+ :suppress:
+
+ #result = model.report(cutoff = 0.2)
+ html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report_cutoff.html", "w")
+ html_file.write(result._repr_html_())
+ html_file.close()
+
+ .. code-block:: python
+
+ model.report(cutoff = 0.2)
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report_cutoff.html
+
+
+ You can also use the ``NearestCentroid.score`` function to compute any
+ classification metric. The default metric is the accuracy:
+
+ .. ipython:: python
+
+ model.score(metric = "f1", average = "macro")
+
+ .. note::
+
+ For multi-class scoring, ``verticapy`` allows the
+ flexibility to use three averaging techniques:
+ micro, macro and weighted. Please refer to
+ `this link `_
+ for more details on how they are calculated.
+
+ Prediction
+ ^^^^^^^^^^^
+
+ Prediction is straight-forward:
+
+ .. ipython:: python
+ :suppress:
+
+ result = model.predict(
+ test,
+ [
+ "SepalLengthCm",
+ "SepalWidthCm",
+ "PetalLengthCm",
+ "PetalWidthCm",
+ ],
+ "prediction",
+ )
+ html_file = open("figures/machine_learning_vertica_cluster_nearest_centroid_prediction.html", "w")
+ html_file.write(result._repr_html_())
+ html_file.close()
+
+ .. code-block:: python
+
+ model.predict(
+ test,
+ [
+ "SepalLengthCm",
+ "SepalWidthCm",
+ "PetalLengthCm",
+ "PetalWidthCm",
+ ],
+ "prediction",
+ )
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_prediction.html
+
+ .. note::
+
+ Predictions can be made automatically using the test set, in which
+ case you don't need to specify the predictors. Alternatively, you
+ can pass only the ``vDataFrame`` to the
+ :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.predict`
+ function, but in this case, it's essential that the column names of
+ the ``vDataFrame`` match the predictors and response name in the
+ model.
+
+ Probabilities
+ ^^^^^^^^^^^^^^
+
+ It is also easy to get the model's probabilities:
+
+ .. ipython:: python
+ :suppress:
+
+ result = model.predict_proba(
+ test,
+ [
+ "SepalLengthCm",
+ "SepalWidthCm",
+ "PetalLengthCm",
+ "PetalWidthCm",
+ ],
+ "prediction",
+ )
+ html_file = open("figures/machine_learning_vertica_cluster_nearest_centroid_proba.html", "w")
+ html_file.write(result._repr_html_())
+ html_file.close()
+
+ .. code-block:: python
+
+ model.predict_proba(
+ test,
+ [
+ "SepalLengthCm",
+ "SepalWidthCm",
+ "PetalLengthCm",
+ "PetalWidthCm",
+ ],
+ "prediction",
+ )
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_proba.html
+
+ .. note::
+
+ Probabilities are added to the vDataFrame, and VerticaPy uses the
+ corresponding probability function in SQL behind the scenes. You
+ can use the ``pos_label`` parameter to add only the probability
+ of the selected category.
+
+ Confusion Matrix
+ ^^^^^^^^^^^^^^^^^
+
+ You can obtain the confusion matrix.
+
+ .. ipython:: python
+
+ model.confusion_matrix()
+
+ .. hint::
+
+ In the context of multi-class classification, you typically work
+ with an overall confusion matrix that summarizes the classification
+ efficiency across all classes. However, you have the flexibility to
+ specify a ``pos_label`` and adjust the cutoff threshold. In this case,
+ a binary confusion matrix is computed, where the chosen class is treated
+ as the positive class, allowing you to evaluate its efficiency as if it
+ were a binary classification problem.
+
+ **Specific confusion matrix:**
+
+ .. ipython:: python
+
+ model.confusion_matrix(pos_label = "Iris-setosa", cutoff = 0.6)
+
+ .. note::
+
+ In classification, the ``cutoff`` is a threshold value used to
+ determine class assignment based on predicted probabilities or
+ scores from a classification model. In binary classification,
+ if the predicted probability for a specific class is greater
+ than or equal to the cutoff, the instance is assigned to the
+ positive class; otherwise, it is assigned to the negative class.
+ Adjusting the cutoff allows for trade-offs between true positives
+ and false positives, enabling the model to be optimized for
+ specific objectives or to consider the relative costs of different
+ classification errors. The choice of cutoff is critical for
+ tailoring the model's performance to meet specific needs.
+
+ Main Plots (Classification Curves)
+ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+ Classification models allow for the creation of various plots that
+ are very helpful in understanding the model, such as the ROC Curve,
+ PRC Curve, Cutoff Curve, Gain Curve, and more.
+
+ Most of the classification curves can be found in the
+ :ref:`chart_gallery.classification_curve`.
+
+ For example, let's draw the model's ROC curve.
+
+ .. code-block:: python
+
+ model.roc_curve(pos_label = "Iris-setosa")
+
+ .. ipython:: python
+ :suppress:
+
+ vp.set_option("plotting_lib", "plotly")
+ fig = model.roc_curve(pos_label = "Iris-setosa")
+ fig.write_html("figures/machine_learning_vertica_cluster_nearest_centroid_roc.html")
+
+ .. raw:: html
+ :file: SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_roc.html
+
+ .. important::
+
+ Most of the curves have a parameter called ``nbins``, which is essential
+ for estimating metrics. The larger the ``nbins``, the more precise the
+ estimation, but it can significantly impact performance. Exercise caution
+ when increasing this parameter excessively.
+
+ .. hint::
+
+ In binary classification, various curves can be easily plotted. However,
+ in multi-class classification, it's important to select the ``pos_label``
+ , representing the class to be treated as positive when drawing the curve.
+
+ Other Plots
+ ^^^^^^^^^^^^
+
+ **Contour plot** is another useful plot that can be produced
+ for models with two predictors.
+
+ .. code-block:: python
+
+ model.contour(pos_label = "Iris-setosa")
+
+ .. important::
+
+ Machine learning models with two predictors can usually
+ benefit from their own contour plot. This visual representation
+ aids in exploring predictions and gaining a deeper understanding
+ of how these models perform in different scenarios.
+ Please refer to :ref:`chart_gallery.contour` for more examples.
+
+ Parameter Modification
+ ^^^^^^^^^^^^^^^^^^^^^^^
+
+ In order to see the parameters:
+
+ .. ipython:: python
+
+ model.get_params()
+
+ And to manually change some of the parameters:
+
+ .. ipython:: python
+
+ model.set_params({'p': 3})
+
+ Model Register
+ ^^^^^^^^^^^^^^
+
+ In order to register the model for tracking and versioning:
+
+ .. code-block:: python
+
+ model.register("model_v1")
+
+ Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html`
+ for more details on model tracking and versioning.
+
+ Model Exporting
+ ^^^^^^^^^^^^^^^^
+
+ **To Memmodel**
+
+ .. code-block:: python
+
+ model.to_memmodel()
+
+ .. note::
+
+ ``MemModel`` objects serve as in-memory representations of machine
+ learning models. They can be used for both in-database and in-memory
+ prediction tasks. These objects can be pickled in the same way that
+ you would pickle a ``scikit-learn`` model.
+
+ The following methods for exporting the model use ``MemModel``, and it
+ is recommended to use ``MemModel`` directly.
+
+ **To SQL**
+
+ You can get the SQL code by:
+
+ .. ipython:: python
+
+ model.to_sql()
+
+ **To Python**
+
+ To obtain the prediction function in Python syntax, use the following code:
+
+ .. ipython:: python
+
+ X = [[5, 2, 3, 1]]
+ #model.to_python()(X)
+
+ .. hint::
+
+ The
+ :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.to_python`
+ method is used to retrieve predictions,
+ probabilities, or cluster distances. For specific details on how to
+ use this method for different model types, refer to the relevant
+ documentation for each model.
"""
# Properties.
From 0e5c0c096666dd8f1bead8540abf23257ebae55d Mon Sep 17 00:00:00 2001
From: Badr
Date: Sat, 28 Oct 2023 23:33:37 -0400
Subject: [PATCH 2/4] correcting bugs and doc - Nearest Centroids
---
.../machine_learning/memmodel/cluster.py | 10 ++--
.../metrics/classification.py | 2 +-
verticapy/machine_learning/vertica/base.py | 2 +-
verticapy/machine_learning/vertica/cluster.py | 57 +++++++++----------
4 files changed, 34 insertions(+), 37 deletions(-)
diff --git a/verticapy/machine_learning/memmodel/cluster.py b/verticapy/machine_learning/memmodel/cluster.py
index 79cf4d115..68f59aa8f 100755
--- a/verticapy/machine_learning/memmodel/cluster.py
+++ b/verticapy/machine_learning/memmodel/cluster.py
@@ -69,7 +69,7 @@ def __init__(
clusters_names: Optional[ArrayLike] = None,
) -> None:
clusters_names = format_type(clusters_names, dtype=list)
- self.clusters_ = np.array(clusters)
+ self.clusters_ = np.array(clusters).astype(float)
self.classes_ = np.array(clusters_names)
self.p_ = p
@@ -380,7 +380,7 @@ def object_type(self) -> Literal["KMeans"]:
# System & Special Methods.
def __init__(self, clusters: ArrayLike, p: int = 2) -> None:
- self.clusters_ = np.array(clusters)
+ self.clusters_ = np.array(clusters).astype(float)
self.p_ = p
@@ -518,7 +518,7 @@ def __init__(
classes: ArrayLike,
p: int = 2,
) -> None:
- self.clusters_ = np.array(clusters)
+ self.clusters_ = np.array(clusters).astype(float)
self.classes_ = np.array(classes)
self.p_ = p
@@ -720,7 +720,7 @@ def __init__(
cluster_size, cluster_score = format_type(
cluster_size, cluster_score, dtype=list
)
- self.clusters_ = np.array(clusters)
+ self.clusters_ = np.array(clusters).astype(float)
self.children_left_ = np.array(children_left)
self.children_right_ = np.array(children_right)
self.cluster_size_ = np.array(cluster_size)
@@ -1100,7 +1100,7 @@ def __init__(
is_categorical: Optional[ArrayLike] = None,
) -> None:
is_categorical = format_type(is_categorical, dtype=list)
- self.clusters_ = np.array(clusters)
+ self.clusters_ = np.array(clusters).astype(float)
self.p_ = p
self.gamma_ = gamma
self.is_categorical_ = np.array(is_categorical)
diff --git a/verticapy/machine_learning/metrics/classification.py b/verticapy/machine_learning/metrics/classification.py
index e8125fd67..2171defcd 100755
--- a/verticapy/machine_learning/metrics/classification.py
+++ b/verticapy/machine_learning/metrics/classification.py
@@ -229,7 +229,7 @@ def _compute_final_score(
"Parameter 'pos_label' can only be used when parameter 'average' is set to 'binary' or undefined."
)
if not (isinstance(pos_label, NoneType)) and not (isinstance(labels, NoneType)):
- raise ValueError("Parameters 'pos_label' and 'labels' can not be both defined.")
+ labels = None
if (
isinstance(pos_label, NoneType)
and isinstance(labels, NoneType)
diff --git a/verticapy/machine_learning/vertica/base.py b/verticapy/machine_learning/vertica/base.py
index 77ebffc9f..9f67ef452 100755
--- a/verticapy/machine_learning/vertica/base.py
+++ b/verticapy/machine_learning/vertica/base.py
@@ -2215,7 +2215,7 @@ def score(
kwargs = {}
if metric not in ("aic", "bic"):
labels = None
- if isinstance(pos_label, NoneType):
+ if isinstance(pos_label, NoneType) or not (self._is_native):
labels = self.classes_
kwargs = {
"average": average,
diff --git a/verticapy/machine_learning/vertica/cluster.py b/verticapy/machine_learning/vertica/cluster.py
index 482873d5c..5cafd6c07 100755
--- a/verticapy/machine_learning/vertica/cluster.py
+++ b/verticapy/machine_learning/vertica/cluster.py
@@ -615,7 +615,7 @@ def _compute_attributes(self) -> None:
Computes the model's attributes.
"""
centers = self.get_vertica_attributes("centers")
- self.clusters_ = centers.to_numpy()
+ self.clusters_ = centers.to_numpy().astype(float)
self.p_ = 2
self._compute_metrics()
@@ -1128,7 +1128,7 @@ def _compute_attributes(self) -> None:
Computes the model's attributes.
"""
centers = self.get_vertica_attributes("centers")
- self.clusters_ = centers.to_numpy()
+ self.clusters_ = centers.to_numpy().astype(float)
self.p_ = 2
self.gamma_ = self.parameters["gamma"]
dtypes = centers.dtype
@@ -1648,7 +1648,7 @@ def _compute_attributes(self) -> None:
"""
centers = self.get_vertica_attributes("BKTree")
self.tree_ = copy.deepcopy(centers)
- self.clusters_ = centers.to_numpy()[:, 1 : len(self.X) + 1]
+ self.clusters_ = centers.to_numpy()[:, 1 : len(self.X) + 1].astype(float)
self.children_left_ = np.array(centers["left_child"])
self.children_right_ = np.array(centers["right_child"])
self.cluster_size_ = np.array(centers["cluster_size"])
@@ -2146,6 +2146,15 @@ class NearestCentroid(MulticlassClassifier):
This object uses pure SQL to compute the distances and
final score.
+ .. important::
+
+ This algorithm is not Vertica Native and relies solely
+ on SQL for attribute computation. While this model does
+ not take advantage of the benefits provided by a model
+ management system, including versioning and tracking,
+ the SQL code it generates can still be used to create a
+ pipeline.
+
Parameters
----------
p: int, optional
@@ -2243,19 +2252,6 @@ class NearestCentroid(MulticlassClassifier):
model = NearestCentroid(p = 2)
- .. hint::
-
- In ``verticapy`` 1.0.x and higher, you do not need to specify the
- model name, as the name is automatically assigned. If you need to
- re-use the model, you can fetch the model name from the model's
- attributes.
-
- .. important::
-
- The model name is crucial for the model management system and
- versioning. It's highly recommended to provide a name if you
- plan to reuse the model later.
-
Model Training
^^^^^^^^^^^^^^^
@@ -2283,6 +2279,12 @@ class NearestCentroid(MulticlassClassifier):
don't work using ``X`` matrices and ``y`` vectors. Instead, we work
directly with lists of predictors and the response name.
+ .. important::
+
+ As this model is not native, it solely relies on SQL statements to
+ compute various attributes, storing them within the object. No data
+ is saved in the database.
+
Metrics
^^^^^^^^
@@ -2291,7 +2293,7 @@ class NearestCentroid(MulticlassClassifier):
.. ipython:: python
:suppress:
- #result = model.report()
+ result = model.report()
html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report.html", "w")
html_file.write(result._repr_html_())
html_file.close()
@@ -2316,7 +2318,7 @@ class NearestCentroid(MulticlassClassifier):
.. ipython:: python
:suppress:
- #result = model.report(cutoff = 0.2)
+ result = model.report(cutoff = 0.2)
html_file = open("SPHINX_DIRECTORY/figures/machine_learning_vertica_cluster_nearest_centroid_report_cutoff.html", "w")
html_file.write(result._repr_html_())
html_file.close()
@@ -2387,7 +2389,7 @@ class NearestCentroid(MulticlassClassifier):
Predictions can be made automatically using the test set, in which
case you don't need to specify the predictors. Alternatively, you
can pass only the ``vDataFrame`` to the
- :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.predict`
+ :py:mod:`verticapy.machine_learning.vertica.cluster.NearestCentroid.predict`
function, but in this case, it's essential that the column names of
the ``vDataFrame`` match the predictors and response name in the
model.
@@ -2551,14 +2553,9 @@ class NearestCentroid(MulticlassClassifier):
Model Register
^^^^^^^^^^^^^^
- In order to register the model for tracking and versioning:
-
- .. code-block:: python
-
- model.register("model_v1")
-
- Please refer to :ref:`notebooks/ml/model_tracking_versioning/index.html`
- for more details on model tracking and versioning.
+ As this model is not native, it does not support model management and
+ versioning. However, it is possible to use the SQL code it generates
+ for deployment.
Model Exporting
^^^^^^^^^^^^^^^^
@@ -2594,12 +2591,12 @@ class NearestCentroid(MulticlassClassifier):
.. ipython:: python
X = [[5, 2, 3, 1]]
- #model.to_python()(X)
+ model.to_python()(X)
.. hint::
The
- :py:mod:`verticapy.machine_learning.vertica.naive_bayes.NearestCentroid.to_python`
+ :py:mod:`verticapy.machine_learning.vertica.cluster.NearestCentroid.to_python`
method is used to retrieve predictions,
probabilities, or cluster distances. For specific details on how to
use this method for different model types, refer to the relevant
@@ -2664,7 +2661,7 @@ def _compute_attributes(self) -> None:
ORDER BY {self.y} ASC""",
title="Getting Model Centroids.",
)
- self.clusters_ = centroids.to_numpy()[:, 0:-1]
+ self.clusters_ = centroids.to_numpy()[:, 0:-1].astype(float)
self.classes_ = self._array_to_int(centroids.to_numpy()[:, -1])
self.p_ = self.parameters["p"]
From 9d94a502ecd9fe66bc254e1efd691341793c1816 Mon Sep 17 00:00:00 2001
From: Badr
Date: Sun, 29 Oct 2023 08:41:45 -0400
Subject: [PATCH 3/4] Update cluster.py
---
verticapy/machine_learning/vertica/cluster.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/verticapy/machine_learning/vertica/cluster.py b/verticapy/machine_learning/vertica/cluster.py
index 5cafd6c07..8871f7fea 100755
--- a/verticapy/machine_learning/vertica/cluster.py
+++ b/verticapy/machine_learning/vertica/cluster.py
@@ -615,7 +615,7 @@ def _compute_attributes(self) -> None:
Computes the model's attributes.
"""
centers = self.get_vertica_attributes("centers")
- self.clusters_ = centers.to_numpy().astype(float)
+ self.clusters_ = centers.to_numpy()
self.p_ = 2
self._compute_metrics()
@@ -1128,7 +1128,7 @@ def _compute_attributes(self) -> None:
Computes the model's attributes.
"""
centers = self.get_vertica_attributes("centers")
- self.clusters_ = centers.to_numpy().astype(float)
+ self.clusters_ = centers.to_numpy()
self.p_ = 2
self.gamma_ = self.parameters["gamma"]
dtypes = centers.dtype
From 3990c73f243b61279edb52ad4cd350bdc9d9fd6c Mon Sep 17 00:00:00 2001
From: Badr
Date: Sun, 29 Oct 2023 12:21:50 -0400
Subject: [PATCH 4/4] Update cluster.py
---
verticapy/machine_learning/memmodel/cluster.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/verticapy/machine_learning/memmodel/cluster.py b/verticapy/machine_learning/memmodel/cluster.py
index 68f59aa8f..c96098d69 100755
--- a/verticapy/machine_learning/memmodel/cluster.py
+++ b/verticapy/machine_learning/memmodel/cluster.py
@@ -1100,7 +1100,7 @@ def __init__(
is_categorical: Optional[ArrayLike] = None,
) -> None:
is_categorical = format_type(is_categorical, dtype=list)
- self.clusters_ = np.array(clusters).astype(float)
+ self.clusters_ = np.array(clusters)
self.p_ = p
self.gamma_ = gamma
self.is_categorical_ = np.array(is_categorical)