From 35dd2792193a9b27dbc08fdba48e150ed86c6348 Mon Sep 17 00:00:00 2001 From: Magdalena Anopsy <74981211+anopsy@users.noreply.github.com> Date: Wed, 10 Apr 2024 21:04:26 +0200 Subject: [PATCH] Docstrings API examples (#648) * Example for `FormulaicTransformer` * Example for `IdentityTransformer` * Example for `PandasTypeSelector` * Example for `InformationFilter` * Example for `RepeatingBasisFunction` * Fix `Examples` keyword in docstring --------- Co-authored-by: Francesco Bruzzesi <42817048+FBruzzesi@users.noreply.github.com> --- sklego/preprocessing/dictmapper.py | 4 +-- sklego/preprocessing/formulaictransformer.py | 27 ++++++++++++++++++++ sklego/preprocessing/identitytransformer.py | 25 ++++++++++++++++++ sklego/preprocessing/outlier_remover.py | 4 +-- sklego/preprocessing/pandastransformers.py | 27 ++++++++++++++++++++ sklego/preprocessing/projections.py | 18 +++++++++++++ sklego/preprocessing/repeatingbasis.py | 16 ++++++++++++ 7 files changed, 117 insertions(+), 4 deletions(-) diff --git a/sklego/preprocessing/dictmapper.py b/sklego/preprocessing/dictmapper.py index d6373dcb2..39f32bb55 100644 --- a/sklego/preprocessing/dictmapper.py +++ b/sklego/preprocessing/dictmapper.py @@ -24,8 +24,8 @@ class DictMapper(TransformerMixin, BaseEstimator): dim_ : int Deprecated, please use `n_features_in_` instead. - Example - ------- + Examples + -------- ```py import pandas as pd from sklego.preprocessing.dictmapper import DictMapper diff --git a/sklego/preprocessing/formulaictransformer.py b/sklego/preprocessing/formulaictransformer.py index 60109e249..2160c5001 100644 --- a/sklego/preprocessing/formulaictransformer.py +++ b/sklego/preprocessing/formulaictransformer.py @@ -34,6 +34,33 @@ class FormulaicTransformer(TransformerMixin, BaseEstimator): The parsed model specification. n_features_in_ : int Number of features seen during `fit`. + + Examples + -------- + ```py + import formulaic + import pandas as pd + import numpy as np + from sklego.preprocessing import FormulaicTransformer + + df = pd.DataFrame({ + 'a': ['A', 'B', 'C'], + 'b': [0.3, 0.1, 0.2], + }) + + #default type of returned matrix - numpy + FormulaicTransformer("a + b + a:b").fit_transform(df) + # array([[1. , 0. , 0. , 0.3, 0. , 0. ], + # [1. , 1. , 0. , 0.1, 0.1, 0. ], + # [1. , 0. , 1. , 0.2, 0. , 0.2]]) + + #pandas return type + FormulaicTransformer("a + b + a:b", "pandas").fit_transform(df) + # Intercept a[T.B] a[T.C] b a[T.B]:b a[T.C]:b + #0 1.0 0 0 0.3 0.0 0.0 + #1 1.0 1 0 0.1 0.1 0.0 + #2 1.0 0 1 0.2 0.0 0.2 + ``` """ def __init__(self, formula, return_type="numpy"): diff --git a/sklego/preprocessing/identitytransformer.py b/sklego/preprocessing/identitytransformer.py index 28d213a19..bf291f00e 100644 --- a/sklego/preprocessing/identitytransformer.py +++ b/sklego/preprocessing/identitytransformer.py @@ -22,6 +22,31 @@ class IdentityTransformer(BaseEstimator, TransformerMixin): The number of features seen during `fit`. shape_ : tuple[int, int] Deprecated, please use `n_samples_` and `n_features_in_` instead. + + Examples + -------- + ```py + import pandas as pd + from sklego.preprocessing import IdentityTransformer + + df = pd.DataFrame({ + "name": ["Swen", "Victor", "Alex"], + "length": [1.82, 1.85, 1.80], + "shoesize": [42, 44, 45] + }) + + IdentityTransformer().fit_transform(df) + # name length shoesize + # 0 Swen 1.82 42 + # 1 Victor 1.85 44 + # 2 Alex 1.80 45 + + #using check_X=True to validate `X` to be non-empty 2D array of finite values and attempt to cast `X` to float + IdentityTransformer(check_X=True).fit_transform(df.drop(columns="name")) + # array([[ 1.82, 42. ], + # [ 1.85, 44. ], + # [ 1.8 , 45. ]]) + ``` """ def __init__(self, check_X: bool = False): diff --git a/sklego/preprocessing/outlier_remover.py b/sklego/preprocessing/outlier_remover.py index a539d4fc7..5bc21a9e5 100644 --- a/sklego/preprocessing/outlier_remover.py +++ b/sklego/preprocessing/outlier_remover.py @@ -21,8 +21,8 @@ class OutlierRemover(TrainOnlyTransformerMixin, BaseEstimator): estimator_ : object The fitted outlier detector. - Example - ------- + Examples + -------- ```py import numpy as np diff --git a/sklego/preprocessing/pandastransformers.py b/sklego/preprocessing/pandastransformers.py index 3b51c85b2..92160df8e 100644 --- a/sklego/preprocessing/pandastransformers.py +++ b/sklego/preprocessing/pandastransformers.py @@ -183,6 +183,33 @@ class PandasTypeSelector(BaseEstimator, TransformerMixin): !!! warning Raises a `TypeError` if input provided is not a DataFrame. + + Examples + -------- + ```py + import pandas as pd + from sklego.preprocessing import PandasTypeSelector + + df = pd.DataFrame({ + "name": ["Swen", "Victor", "Alex"], + "length": [1.82, 1.85, 1.80], + "shoesize": [42, 44, 45] + }) + + #Excluding single column + PandasTypeSelector(exclude="int64").fit_transform(df) + # name length + #0 Swen 1.82 + #1 Victor 1.85 + #2 Alex 1.80 + + #Including multiple columns + PandasTypeSelector(include=["int64", "object"]).fit_transform(df) + # name shoesize + #0 Swen 42 + #1 Victor 44 + #2 Alex 45 + ``` """ def __init__(self, include=None, exclude=None): diff --git a/sklego/preprocessing/projections.py b/sklego/preprocessing/projections.py index dc0689520..86430f05c 100644 --- a/sklego/preprocessing/projections.py +++ b/sklego/preprocessing/projections.py @@ -155,6 +155,24 @@ class InformationFilter(BaseEstimator, TransformerMixin): The projection matrix that can be used to filter information out of a dataset. col_ids_ : List[int] of length `len(columns)` The list of column ids of the sensitive columns. + + Examples + -------- + ```py + import pandas as pd + from sklego.preprocessing import InformationFilter + + df = pd.DataFrame({ + "user_id": [101, 102, 103], + "length": [1.82, 1.85, 1.80], + "age": [21, 37, 45] + }) + + InformationFilter(columns=["length", "age"], alpha=0.5).fit_transform(df) + # array([[50.10152483, 3.87905643], + # [50.26253897, 19.59684308], + # [52.66084873, 28.06719867]]) + ``` """ def __init__(self, columns, alpha=1): diff --git a/sklego/preprocessing/repeatingbasis.py b/sklego/preprocessing/repeatingbasis.py index 5f83be109..5bcb1b9f4 100644 --- a/sklego/preprocessing/repeatingbasis.py +++ b/sklego/preprocessing/repeatingbasis.py @@ -41,6 +41,22 @@ class RepeatingBasisFunction(TransformerMixin, BaseEstimator): ---------- pipeline_ : ColumnTransformer Fitted `ColumnTransformer` object used to transform data with repeating basis functions. + + Examples + -------- + ```py + import pandas as pd + from sklego.preprocessing import RepeatingBasisFunction + + df = pd.DataFrame({ + "user_id": [101, 102, 103], + "created_day": [5, 1, 7] + }) + RepeatingBasisFunction(column="created_day", input_range=(1,7)).fit_transform(df) + # array([[0.06217652, 0.00432024, 0.16901332, 0.89483932, 0.64118039], + # [1. , 0.36787944, 0.01831564, 0.01831564, 0.36787944], + # [1. , 0.36787944, 0.01831564, 0.01831564, 0.36787944]]) + ``` """ def __init__(self, column=0, remainder="drop", n_periods=12, input_range=None, width=1.0):