diff --git a/verticapy/machine_learning/memmodel/cluster.py b/verticapy/machine_learning/memmodel/cluster.py index 3743fa572..98179be2c 100755 --- a/verticapy/machine_learning/memmodel/cluster.py +++ b/verticapy/machine_learning/memmodel/cluster.py @@ -28,7 +28,17 @@ class Clustering(InMemoryModel): """ - InMemoryModel implementation of clustering algorithms. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of clustering algorithms. + + .. note:: + + This is base class for all in-memory implementations of + clustering algorithms viz. + :py:mod:`verticapy.machine_learning.memmodel.cluster.KMeans`, + :py:mod:`verticapy.machine_learning.memmodel.cluster.NearestCentroid` + , :py:mod:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans` + and :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes` Parameters ---------- @@ -234,7 +244,7 @@ def transform_sql(self, X: ArrayLike) -> list[str]: class KMeans(Clustering): """ - InMemoryModel implementation of KMeans. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` implementation of KMeans. Parameters ---------- @@ -242,6 +252,123 @@ class KMeans(Clustering): List of the model's cluster centers. p: int, optional The p corresponding to one of the p-distances. + + .. note:: + + :py:mod:`verticapy.machine_learning.memmodel` are + defined entirely by their attributes. For example, + 'cluster centroids' and 'p value' define a KMeans model. + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + :suppress: + + from verticapy.machine_learning.memmodel.cluster import KMeans + + A KMeans model is defined by its cluster centroids and the p value. + In this example, we will use the following: + + .. ipython:: python + :suppress: + + clusters = [[0.5, 0.6], [1, 2], [100, 200]] + p = 2 + + Let's create a + :py:mod:`verticapy.machine_learning.memmodel.cluster.KMeans` model. + + .. ipython:: python + :suppress: + + model_km = KMeans(clusters, p) + + Create a dataset. + + .. ipython:: python + :suppress: + + data = [[2, 3]] + + **Making In-Memory Predictions** + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict` + method to do predictions + + .. ipython:: python + :suppress: + + model_km.predict(data)[0] + + .. note:: + + :py:mod:`verticapy.machine_learning.memmodel.cluster.KMeans` + assigns a cluster id to identify each cluster. + In this example, cluster with centroid [0.5, 0.6] will have id = 0, + with centroid [1,2] will have id = 1 and so on. + :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict` + method returns the id of the predicted cluster. + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict_proba` + method to compute the predicted probabilities for each cluster + + .. ipython:: python + :suppress: + + model_km.predict_proba(data) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.transform` + method to compute the distance from each cluster + + .. ipython:: python + :suppress: + + model_km.transform(data) + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + :suppress: + + cnames = ['col1', 'col2'] + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + :suppress: + + model_km.predict_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.predict_proba_sql` + method to get the SQL code needed to deploy the model that + computes predicted probabilities + + .. ipython:: python + :suppress: + + model_km.predict_proba_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KMeans.transform_sql` + method to get the SQL code needed to deploy the model that + computes distance from each cluster + + .. ipython:: python + :suppress: + + model_km.transform_sql(cnames) + + .. hint:: + + This object can be pickled and used in any in-memory + environment, just like `SKLEARN `_ models. """ # Properties. @@ -259,8 +386,8 @@ def __init__(self, clusters: ArrayLike, p: int = 2) -> None: class NearestCentroid(Clustering): """ - InMemoryModel implementation of NearestCentroid - algorithm. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of NearestCentroid algorithm. Parameters ---------- @@ -270,7 +397,108 @@ class NearestCentroid(Clustering): Names of the classes. p: int, optional The p corresponding to one of the p-distances. - """ + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + :suppress: + + from verticapy.machine_learning.memmodel.cluster import NearestCentroid + + A NearestCentroid model is defined by its cluster centroids, + classes and the p value. In this example, we will use the following: + + .. ipython:: python + :suppress: + + clusters = [[0.5, 0.6], [1, 2], [100, 200]] + p = 2 + classes = ['class_a', 'class_b', 'class_c'] + + + Let's create a + :py:mod:`verticapy.machine_learning.memmodel.cluster.NearestCentroid` model. + + .. ipython:: python + :suppress: + + model_nc = NearestCentroid(clusters, classes, p) + + Create a dataset. + + .. ipython:: python + :suppress: + + data = [[2, 3]] + + **Making In-Memory Predictions** + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict` + method to do predictions + + .. ipython:: python + :suppress: + + model_nc.predict(data)[0] + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict_proba` + method to compute the predicted probabilities for each cluster + + .. ipython:: python + :suppress: + + model_nc.predict_proba(data) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.transform` + method to compute the distance from each cluster + + .. ipython:: python + :suppress: + + model_nc.transform(data) + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + :suppress: + + cnames = ['col1', 'col2'] + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + :suppress: + + model_nc.predict_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.predict_proba_sql` + method to get the SQL code needed to deploy the model that computes predicted probabilities + + .. ipython:: python + :suppress: + + model_nc.predict_proba_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.NearestCentroid.transform_sql` + method to get the SQL code needed to deploy the model that computes distance from each cluster + + .. ipython:: python + :suppress: + + model_nc.transform_sql(cnames) + + .. hint:: + + This object can be pickled and used in any in-memory + environment, just like `SKLEARN `_ models. """ # Properties. @@ -297,7 +525,8 @@ def __init__( class BisectingKMeans(Clustering, Tree): """ - InMemoryModel implementation of BisectingKMeans. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of BisectingKMeans. Parameters ---------- @@ -320,7 +549,134 @@ class BisectingKMeans(Clustering, Tree): squares. p: int, optional The p corresponding to one of the p-distances. - """ + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + :suppress: + + from verticapy.machine_learning.memmodel.cluster import BisectingKMeans + + A BisectingKMeans model is defined by its cluster centroids, + left and right child node id's of given node. In this example, + we will use the following: + + .. ipython:: python + :suppress: + + clusters = [[0.5, 0.6], [1, 2], [100, 200], [10, 700], [-100, -200]] + children_left = [1, 3, None, None, None] + children_right = [2, 4, None, None, None] + + + Let's create a :py:mod:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans` model. + + .. ipython:: python + :suppress: + + model_bkm = BisectingKMeans(clusters, children_left, children_right) + + Create a dataset. + + .. ipython:: python + :suppress: + + data = [[2, 3]] + + **Making In-Memory Predictions** + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict` + method to do predictions + + .. ipython:: python + :suppress: + + model_bkm.predict(data)[0] + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict_proba` + method to compute the predicted probabilities for each cluster + + .. ipython:: python + :suppress: + + model_bkm.predict_proba(data) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.transform` + method to compute the distance from each cluster + + .. ipython:: python + :suppress: + + model_bkm.transform(data) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.to_graphviz` + method to generate code for a `Graphviz `_ tree + + .. ipython:: python + :suppress: + + model_bkm.to_graphviz() + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.plot_tree` + method to draw the input tree. + + .. code-block:: python + + model_bkm.plot_tree() + + .. ipython:: python + :suppress: + + res = model_bkm.plot_tree() + res.render(filename='figures/machine_learning_cluster_bisecting_kmeans', format='png') + + .. image:: /../figures/machine_learning_cluster_bisecting_kmeans.png + + .. note:: :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.plot_tree` + requires the `Graphviz `_ module + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + :suppress: + + cnames = ['col1', 'col2'] + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + :suppress: + + model_bkm.predict_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.predict_proba_sql` + method to get the SQL code needed to deploy the model that computes predicted probabilities + + .. ipython:: python + :suppress: + + model_bkm.predict_proba_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.BisectingKMeans.transform_sql` + method to get the SQL code needed to deploy the model that computes distance from each cluster + + .. ipython:: python + :suppress: + + model_bkm.transform_sql(cnames) + + .. hint:: + + This object can be pickled and used in any in-memory + environment, just like `SKLEARN `_ models. """ # Properties. @@ -589,7 +945,8 @@ def to_graphviz( class KPrototypes(Clustering): """ - InMemoryModel implementation of KPrototypes. + :py:mod:`verticapy.machine_learning.memmodel.base.InMemoryModel` + implementation of KPrototypes. Parameters ---------- @@ -606,6 +963,122 @@ class KPrototypes(Clustering): is a categorical variable, where True indicates categorical and False numerical. If empty, all the variables are considered categorical. + + .. note:: :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes` + algorithm allows you to use categorical variables directly without the need to encode them. + + Examples + -------- + + **Initalization** + + Import the required module. + + .. ipython:: python + :suppress: + + from verticapy.machine_learning.memmodel.cluster import KPrototypes + + A KPrototypes model is defined by its cluster centroids. Optionally + you can also provide p value, gamma and provide information about + categorical variables. In this example, we will use the following: + + .. ipython:: python + :suppress: + + clusters = [[0.5, 'high'], [1, 'low'], [100, 'high']] + p = 2 + gamma = 1.0 + is_categorical = [0, 1] + + Let's create a :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes` + model. + + .. ipython:: python + :suppress: + + model_kp = KPrototypes(clusters, p, gamma, is_categorical) + + Create a dataset. + + .. ipython:: python + :suppress: + + data = [[2, 'low']] + + **Making In-Memory Predictions** + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict` + method to do predictions + + .. ipython:: python + :suppress: + + model_kp.predict(data)[0] + + .. note:: + + :py:mod:`verticapy.machine_learning.memmodel.cluster.KPrototypes` + assigns a cluster id to identify each cluster. + In this example, cluster with centroid [0.5, 'high'] will have + id = 0, with centroid [1,'low'] will have id = 1 and so on. + :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict` + method returns the id of the predicted cluster. + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict_proba` + method to compute the predicted probabilities for each cluster + + .. ipython:: python + :suppress: + + model_kp.predict_proba(data) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.transform` + method to compute the distance from each cluster + + .. ipython:: python + :suppress: + + model_kp.transform(data) + + **Deploy SQL Code** + + Let's use the following column names: + + .. ipython:: python + :suppress: + + cnames = ['col1', 'col2'] + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict_sql` + method to get the SQL code needed to deploy the model using its attributes + + .. ipython:: python + :suppress: + + model_kp.predict_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.predict_proba_sql` + method to get the SQL code needed to deploy the model that computes predicted probabilities + + .. ipython:: python + :suppress: + + model_kp.predict_proba_sql(cnames) + + Use :py:meth:`verticapy.machine_learning.memmodel.cluster.KPrototypes.transform_sql` + method to get the SQL code needed to deploy the model that computes distance from each cluster + + .. ipython:: python + :suppress: + + model_kp.transform_sql(cnames) + + .. hint:: + + This object can be pickled and used in any in-memory + environment, just like `SKLEARN `_ models. + """ # Properties.