diff --git a/verticapy/machine_learning/memmodel/cluster.py b/verticapy/machine_learning/memmodel/cluster.py
index 3743fa572..98179be2c 100755
--- a/verticapy/machine_learning/memmodel/cluster.py
+++ b/verticapy/machine_learning/memmodel/cluster.py
@@ -28,7 +28,17 @@
class Clustering(InMemoryModel):
"""
- InMemoryModel implementation of clustering algorithms.
+ :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel`
+ implementation of clustering algorithms.
+
+ .. note::
+
+ This is base class for all in-memory implementations of
+ clustering algorithms viz.
+ :py:mod:`verticapy.machine_learning.memmodel.cluster.KMeans`,
+ :py:mod:`verticapy.machine_learning.memmodel.cluster.NearestCentroid`
+ , :py:mod:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans`
+ and :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes`
Parameters
----------
@@ -234,7 +244,7 @@ def transform_sql(self, X: ArrayLike) -> list[str]:
class KMeans(Clustering):
"""
- InMemoryModel implementation of KMeans.
+ :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of KMeans.
Parameters
----------
@@ -242,6 +252,123 @@ class KMeans(Clustering):
List of the model's cluster centers.
p: int, optional
The p corresponding to one of the p-distances.
+
+ .. note::
+
+ :py:mod:`verticapy.machine_learning.memmodel` are
+ defined entirely by their attributes. For example,
+ 'cluster centroids' and 'p value' define a KMeans model.
+
+ Examples
+ --------
+
+ **Initalization**
+
+ Import the required module.
+
+ .. ipython:: python
+ :suppress:
+
+ from verticapy.machine_learning.memmodel.cluster import KMeans
+
+ A KMeans model is defined by its cluster centroids and the p value.
+ In this example, we will use the following:
+
+ .. ipython:: python
+ :suppress:
+
+ clusters = [[0.5, 0.6], [1, 2], [100, 200]]
+ p = 2
+
+ Let's create a
+ :py:mod:`verticapy.machine_learning.memmodel.cluster.KMeans` model.
+
+ .. ipython:: python
+ :suppress:
+
+ model_km = KMeans(clusters, p)
+
+ Create a dataset.
+
+ .. ipython:: python
+ :suppress:
+
+ data = [[2, 3]]
+
+ **Making In-Memory Predictions**
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict`
+ method to do predictions
+
+ .. ipython:: python
+ :suppress:
+
+ model_km.predict(data)[0]
+
+ .. note::
+
+ :py:mod:`verticapy.machine_learning.memmodel.cluster.KMeans`
+ assigns a cluster id to identify each cluster.
+ In this example, cluster with centroid [0.5, 0.6] will have id = 0,
+ with centroid [1,2] will have id = 1 and so on.
+ :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict`
+ method returns the id of the predicted cluster.
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict_proba`
+ method to compute the predicted probabilities for each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_km.predict_proba(data)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.transform`
+ method to compute the distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_km.transform(data)
+
+ **Deploy SQL Code**
+
+ Let's use the following column names:
+
+ .. ipython:: python
+ :suppress:
+
+ cnames = ['col1', 'col2']
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict_sql`
+ method to get the SQL code needed to deploy the model using its attributes
+
+ .. ipython:: python
+ :suppress:
+
+ model_km.predict_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict_proba_sql`
+ method to get the SQL code needed to deploy the model that
+ computes predicted probabilities
+
+ .. ipython:: python
+ :suppress:
+
+ model_km.predict_proba_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.transform_sql`
+ method to get the SQL code needed to deploy the model that
+ computes distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_km.transform_sql(cnames)
+
+ .. hint::
+
+ This object can be pickled and used in any in-memory
+ environment, just like `SKLEARN `_ models.
"""
# Properties.
@@ -259,8 +386,8 @@ def __init__(self, clusters: ArrayLike, p: int = 2) -> None:
class NearestCentroid(Clustering):
"""
- InMemoryModel implementation of NearestCentroid
- algorithm.
+ :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel`
+ implementation of NearestCentroid algorithm.
Parameters
----------
@@ -270,7 +397,108 @@ class NearestCentroid(Clustering):
Names of the classes.
p: int, optional
The p corresponding to one of the p-distances.
- """
+
+ Examples
+ --------
+
+ **Initalization**
+
+ Import the required module.
+
+ .. ipython:: python
+ :suppress:
+
+ from verticapy.machine_learning.memmodel.cluster import NearestCentroid
+
+ A NearestCentroid model is defined by its cluster centroids,
+ classes and the p value. In this example, we will use the following:
+
+ .. ipython:: python
+ :suppress:
+
+ clusters = [[0.5, 0.6], [1, 2], [100, 200]]
+ p = 2
+ classes = ['class_a', 'class_b', 'class_c']
+
+
+ Let's create a
+ :py:mod:`verticapy.machine_learning.memmodel.cluster.NearestCentroid` model.
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc = NearestCentroid(clusters, classes, p)
+
+ Create a dataset.
+
+ .. ipython:: python
+ :suppress:
+
+ data = [[2, 3]]
+
+ **Making In-Memory Predictions**
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict`
+ method to do predictions
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc.predict(data)[0]
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict_proba`
+ method to compute the predicted probabilities for each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc.predict_proba(data)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.transform`
+ method to compute the distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc.transform(data)
+
+ **Deploy SQL Code**
+
+ Let's use the following column names:
+
+ .. ipython:: python
+ :suppress:
+
+ cnames = ['col1', 'col2']
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict_sql`
+ method to get the SQL code needed to deploy the model using its attributes
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc.predict_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict_proba_sql`
+ method to get the SQL code needed to deploy the model that computes predicted probabilities
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc.predict_proba_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.transform_sql`
+ method to get the SQL code needed to deploy the model that computes distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_nc.transform_sql(cnames)
+
+ .. hint::
+
+ This object can be pickled and used in any in-memory
+ environment, just like `SKLEARN `_ models. """
# Properties.
@@ -297,7 +525,8 @@ def __init__(
class BisectingKMeans(Clustering, Tree):
"""
- InMemoryModel implementation of BisectingKMeans.
+ :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel`
+ implementation of BisectingKMeans.
Parameters
----------
@@ -320,7 +549,134 @@ class BisectingKMeans(Clustering, Tree):
squares.
p: int, optional
The p corresponding to one of the p-distances.
- """
+
+ Examples
+ --------
+
+ **Initalization**
+
+ Import the required module.
+
+ .. ipython:: python
+ :suppress:
+
+ from verticapy.machine_learning.memmodel.cluster import BisectingKMeans
+
+ A BisectingKMeans model is defined by its cluster centroids,
+ left and right child node id's of given node. In this example,
+ we will use the following:
+
+ .. ipython:: python
+ :suppress:
+
+ clusters = [[0.5, 0.6], [1, 2], [100, 200], [10, 700], [-100, -200]]
+ children_left = [1, 3, None, None, None]
+ children_right = [2, 4, None, None, None]
+
+
+ Let's create a :py:mod:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans` model.
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm = BisectingKMeans(clusters, children_left, children_right)
+
+ Create a dataset.
+
+ .. ipython:: python
+ :suppress:
+
+ data = [[2, 3]]
+
+ **Making In-Memory Predictions**
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict`
+ method to do predictions
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.predict(data)[0]
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict_proba`
+ method to compute the predicted probabilities for each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.predict_proba(data)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.transform`
+ method to compute the distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.transform(data)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.to_graphviz`
+ method to generate code for a `Graphviz `_ tree
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.to_graphviz()
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.plot_tree`
+ method to draw the input tree.
+
+ .. code-block:: python
+
+ model_bkm.plot_tree()
+
+ .. ipython:: python
+ :suppress:
+
+ res = model_bkm.plot_tree()
+ res.render(filename='figures/machine_learning_cluster_bisecting_kmeans', format='png')
+
+ .. image:: /../figures/machine_learning_cluster_bisecting_kmeans.png
+
+ .. note:: :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.plot_tree`
+ requires the `Graphviz `_ module
+
+ **Deploy SQL Code**
+
+ Let's use the following column names:
+
+ .. ipython:: python
+ :suppress:
+
+ cnames = ['col1', 'col2']
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict_sql`
+ method to get the SQL code needed to deploy the model using its attributes
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.predict_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict_proba_sql`
+ method to get the SQL code needed to deploy the model that computes predicted probabilities
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.predict_proba_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.transform_sql`
+ method to get the SQL code needed to deploy the model that computes distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_bkm.transform_sql(cnames)
+
+ .. hint::
+
+ This object can be pickled and used in any in-memory
+ environment, just like `SKLEARN `_ models. """
# Properties.
@@ -589,7 +945,8 @@ def to_graphviz(
class KPrototypes(Clustering):
"""
- InMemoryModel implementation of KPrototypes.
+ :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel`
+ implementation of KPrototypes.
Parameters
----------
@@ -606,6 +963,122 @@ class KPrototypes(Clustering):
is a categorical variable, where True indicates
categorical and False numerical. If empty, all
the variables are considered categorical.
+
+ .. note:: :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes`
+ algorithm allows you to use categorical variables directly without the need to encode them.
+
+ Examples
+ --------
+
+ **Initalization**
+
+ Import the required module.
+
+ .. ipython:: python
+ :suppress:
+
+ from verticapy.machine_learning.memmodel.cluster import KPrototypes
+
+ A KPrototypes model is defined by its cluster centroids. Optionally
+ you can also provide p value, gamma and provide information about
+ categorical variables. In this example, we will use the following:
+
+ .. ipython:: python
+ :suppress:
+
+ clusters = [[0.5, 'high'], [1, 'low'], [100, 'high']]
+ p = 2
+ gamma = 1.0
+ is_categorical = [0, 1]
+
+ Let's create a :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes`
+ model.
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp = KPrototypes(clusters, p, gamma, is_categorical)
+
+ Create a dataset.
+
+ .. ipython:: python
+ :suppress:
+
+ data = [[2, 'low']]
+
+ **Making In-Memory Predictions**
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict`
+ method to do predictions
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp.predict(data)[0]
+
+ .. note::
+
+ :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes`
+ assigns a cluster id to identify each cluster.
+ In this example, cluster with centroid [0.5, 'high'] will have
+ id = 0, with centroid [1,'low'] will have id = 1 and so on.
+ :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict`
+ method returns the id of the predicted cluster.
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict_proba`
+ method to compute the predicted probabilities for each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp.predict_proba(data)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.transform`
+ method to compute the distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp.transform(data)
+
+ **Deploy SQL Code**
+
+ Let's use the following column names:
+
+ .. ipython:: python
+ :suppress:
+
+ cnames = ['col1', 'col2']
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict_sql`
+ method to get the SQL code needed to deploy the model using its attributes
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp.predict_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict_proba_sql`
+ method to get the SQL code needed to deploy the model that computes predicted probabilities
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp.predict_proba_sql(cnames)
+
+ Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.transform_sql`
+ method to get the SQL code needed to deploy the model that computes distance from each cluster
+
+ .. ipython:: python
+ :suppress:
+
+ model_kp.transform_sql(cnames)
+
+ .. hint::
+
+ This object can be pickled and used in any in-memory
+ environment, just like `SKLEARN `_ models.
+
"""
# Properties.