Docstrings API examples (#648)

* Example for `FormulaicTransformer` * Example for `IdentityTransformer` * Example for `PandasTypeSelector` * Example for `InformationFilter` * Example for `RepeatingBasisFunction` * Fix `Examples` keyword in docstring --------- Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com>
koaning · Apr 10, 2024 · 35dd279 · 35dd279
1 parent 5e53190
commit 35dd279
Show file tree

Hide file tree

Showing 7 changed files with 117 additions and 4 deletions.
diff --git a/sklego/preprocessing/dictmapper.py b/sklego/preprocessing/dictmapper.py
@@ -24,8 +24,8 @@ class DictMapper(TransformerMixin, BaseEstimator):
     dim_ : int
         Deprecated, please use `n_features_in_` instead.
 
-    Example
-    -------
+    Examples
+    --------
     ```py
     import pandas as pd
     from sklego.preprocessing.dictmapper import DictMapper

diff --git a/sklego/preprocessing/formulaictransformer.py b/sklego/preprocessing/formulaictransformer.py
@@ -34,6 +34,33 @@ class FormulaicTransformer(TransformerMixin, BaseEstimator):
         The parsed model specification.
     n_features_in_ : int
         Number of features seen during `fit`.
+
+    Examples
+    --------
+    ```py
+    import formulaic
+    import pandas as pd
+    import numpy as np
+    from sklego.preprocessing import FormulaicTransformer
+
+    df = pd.DataFrame({
+        'a': ['A', 'B', 'C'],
+        'b': [0.3, 0.1, 0.2],
+    })
+
+    #default type of returned matrix - numpy
+    FormulaicTransformer("a + b + a:b").fit_transform(df)
+    # array([[1. , 0. , 0. , 0.3, 0. , 0. ],
+    #        [1. , 1. , 0. , 0.1, 0.1, 0. ],
+    #        [1. , 0. , 1. , 0.2, 0. , 0.2]])
+
+    #pandas return type
+    FormulaicTransformer("a + b + a:b", "pandas").fit_transform(df)
+    #	Intercept	a[T.B]	a[T.C]	b	    a[T.B]:b	a[T.C]:b
+    #0	1.0	        0	    0	    0.3	    0.0	        0.0
+    #1	1.0	        1	    0	    0.1	    0.1	        0.0
+    #2	1.0	        0	    1	    0.2	    0.0	        0.2
+    ```
     """
 
     def __init__(self, formula, return_type="numpy"):

diff --git a/sklego/preprocessing/identitytransformer.py b/sklego/preprocessing/identitytransformer.py
@@ -22,6 +22,31 @@ class IdentityTransformer(BaseEstimator, TransformerMixin):
         The number of features seen during `fit`.
     shape_ : tuple[int, int]
         Deprecated, please use `n_samples_` and `n_features_in_` instead.
+
+    Examples
+    --------
+    ```py
+    import pandas as pd
+    from sklego.preprocessing import IdentityTransformer
+
+    df = pd.DataFrame({
+        "name": ["Swen", "Victor", "Alex"],
+        "length": [1.82, 1.85, 1.80],
+        "shoesize": [42, 44, 45]
+    })
+
+    IdentityTransformer().fit_transform(df)
+    #	name	length	shoesize
+    # 0	Swen	1.82	42
+    # 1	Victor	1.85	44
+    # 2	Alex	1.80	45
+
+    #using check_X=True to validate `X` to be non-empty 2D array of finite values and attempt to cast `X` to float
+    IdentityTransformer(check_X=True).fit_transform(df.drop(columns="name"))
+    # array([[ 1.82, 42.  ],
+    #        [ 1.85, 44.  ],
+    #        [ 1.8 , 45.  ]])
+    ```
     """
 
     def __init__(self, check_X: bool = False):

diff --git a/sklego/preprocessing/outlier_remover.py b/sklego/preprocessing/outlier_remover.py
@@ -21,8 +21,8 @@ class OutlierRemover(TrainOnlyTransformerMixin, BaseEstimator):
     estimator_ : object
         The fitted outlier detector.
 
-    Example
-    -------
+    Examples
+    --------
     ```py
     import numpy as np
 

diff --git a/sklego/preprocessing/pandastransformers.py b/sklego/preprocessing/pandastransformers.py
@@ -183,6 +183,33 @@ class PandasTypeSelector(BaseEstimator, TransformerMixin):
     !!! warning
 
         Raises a `TypeError` if input provided is not a DataFrame.
+
+    Examples
+    --------
+    ```py
+    import pandas as pd
+    from sklego.preprocessing import PandasTypeSelector
+
+    df = pd.DataFrame({
+        "name": ["Swen", "Victor", "Alex"],
+        "length": [1.82, 1.85, 1.80],
+        "shoesize": [42, 44, 45]
+    })
+
+    #Excluding single column
+    PandasTypeSelector(exclude="int64").fit_transform(df)
+    #	name	length
+    #0	Swen	1.82
+    #1	Victor	1.85
+    #2	Alex	1.80
+
+    #Including multiple columns
+    PandasTypeSelector(include=["int64", "object"]).fit_transform(df)
+    #	name	shoesize
+    #0	Swen	42
+    #1	Victor	44
+    #2	Alex	45
+    ```
     """
 
     def __init__(self, include=None, exclude=None):

diff --git a/sklego/preprocessing/projections.py b/sklego/preprocessing/projections.py
@@ -155,6 +155,24 @@ class InformationFilter(BaseEstimator, TransformerMixin):
         The projection matrix that can be used to filter information out of a dataset.
     col_ids_ : List[int] of length `len(columns)`
         The list of column ids of the sensitive columns.
+
+    Examples
+    --------
+    ```py
+    import pandas as pd
+    from sklego.preprocessing import InformationFilter
+
+    df = pd.DataFrame({
+        "user_id": [101, 102, 103],
+        "length": [1.82, 1.85, 1.80],
+        "age": [21, 37, 45]
+    })
+
+    InformationFilter(columns=["length", "age"], alpha=0.5).fit_transform(df)
+    # array([[50.10152483,  3.87905643],
+    #        [50.26253897, 19.59684308],
+    #        [52.66084873, 28.06719867]])
+    ```
     """
 
     def __init__(self, columns, alpha=1):

diff --git a/sklego/preprocessing/repeatingbasis.py b/sklego/preprocessing/repeatingbasis.py
@@ -41,6 +41,22 @@ class RepeatingBasisFunction(TransformerMixin, BaseEstimator):
     ----------
     pipeline_ : ColumnTransformer
         Fitted `ColumnTransformer` object used to transform data with repeating basis functions.
+
+    Examples
+    --------
+    ```py
+    import pandas as pd
+    from sklego.preprocessing import RepeatingBasisFunction
+
+    df = pd.DataFrame({
+        "user_id": [101, 102, 103],
+        "created_day": [5, 1, 7]
+    })
+    RepeatingBasisFunction(column="created_day", input_range=(1,7)).fit_transform(df)
+    # array([[0.06217652, 0.00432024, 0.16901332, 0.89483932, 0.64118039],
+    #        [1.        , 0.36787944, 0.01831564, 0.01831564, 0.36787944],
+    #        [1.        , 0.36787944, 0.01831564, 0.01831564, 0.36787944]])
+    ```
     """
 
     def __init__(self, column=0, remainder="drop", n_periods=12, input_range=None, width=1.0):