Skip to content

Commit

Permalink
Remove deprecated code (#626)
Browse files Browse the repository at this point in the history
* remove deprecated but patsy

* patsy removal and formulaic docs

* docs

* import fix
  • Loading branch information
FBruzzesi authored Mar 15, 2024
1 parent b7e9f77 commit 47c29a5
Show file tree
Hide file tree
Showing 26 changed files with 64 additions and 397 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/dependencies.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ jobs:
python -m pip install -e ".[all]"
- name: Run Checks
run: |
python tests/scripts/check_pip.py installed cvxpy formulaic patsy scikit-learn umap-learn
python tests/scripts/check_pip.py installed cvxpy formulaic scikit-learn umap-learn
- name: Docs can Build
run: |
sudo apt-get update && sudo apt-get install pandoc
Expand Down
31 changes: 21 additions & 10 deletions docs/_scripts/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,12 +118,12 @@
# --8<-- [end:column-capper-inf]


######################################## Patsy ###########################################
######################################## Formulaic #######################################
##########################################################################################

# --8<-- [start:patsy-1]
# --8<-- [start:formulaic-1]
import pandas as pd
from sklego.preprocessing import PatsyTransformer
from sklego.preprocessing import FormulaicTransformer

df = pd.DataFrame({
"a": [1, 2, 3, 4, 5],
Expand All @@ -132,15 +132,26 @@
})
X, y = df[["a", "b"]], df[["y"]].to_numpy()

pt = PatsyTransformer("a + np.log(a) + b")
pt.fit(X, y).transform(X)
# --8<-- [end:patsy-1]
formulaic_transformer = FormulaicTransformer(
formula="a + np.log(a) + b",
return_type="pandas"
)
formulaic_transformer.fit(X, y).transform(X)
# --8<-- [end:formulaic-1]

with open(_static_path / "formulaic-1.md", "w") as f:
f.write(formulaic_transformer.fit(X, y).transform(X).head().to_markdown(index=False))

# --8<-- [start:patsy-2]
pt = PatsyTransformer("a + np.log(a) + b - 1")
pt.fit(X, y).transform(X)
# --8<-- [end:patsy-2]
# --8<-- [start:formulaic-2]
formulaic_transformer = FormulaicTransformer(
formula="a + np.log(a) + b - 1",
return_type="pandas"
)
formulaic_transformer.fit(X, y).transform(X)
# --8<-- [end:formulaic-2]

with open(_static_path / "formulaic-2.md", "w") as f:
f.write(formulaic_transformer.fit(X, y).transform(X).head().to_markdown(index=False))

######################################## RBF ###########################################
##########################################################################################
Expand Down
7 changes: 7 additions & 0 deletions docs/_static/preprocessing/formulaic-1.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
| Intercept | a | np.log(a) | b[T.no] | b[T.yes] |
|------------:|----:|------------:|----------:|-----------:|
| 1 | 1 | 0 | 0 | 1 |
| 1 | 2 | 0.693147 | 0 | 1 |
| 1 | 3 | 1.09861 | 1 | 0 |
| 1 | 4 | 1.38629 | 0 | 0 |
| 1 | 5 | 1.60944 | 0 | 1 |
7 changes: 7 additions & 0 deletions docs/_static/preprocessing/formulaic-2.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
| a | np.log(a) | b[T.maybe] | b[T.no] | b[T.yes] |
|----:|------------:|-------------:|----------:|-----------:|
| 1 | 0 | 0 | 0 | 1 |
| 2 | 0.693147 | 0 | 0 | 1 |
| 3 | 1.09861 | 0 | 1 | 0 |
| 4 | 1.38629 | 1 | 0 | 0 |
| 5 | 1.60944 | 0 | 0 | 1 |
Binary file modified docs/_static/preprocessing/interval-encoder-1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/interval-encoder-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/interval-encoder-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/monotonic-2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/monotonic-3.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/rbf-data.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/rbf-plot.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/_static/preprocessing/rbf-regr.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 0 additions & 6 deletions docs/api/features-selection.md

This file was deleted.

8 changes: 4 additions & 4 deletions docs/api/preprocessing.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,22 @@
show_root_full_path: true
show_root_heading: true

:::sklego.preprocessing.projections.OrthogonalTransformer
:::sklego.preprocessing.formulaictransformer.FormulaicTransformer
options:
show_root_full_path: true
show_root_heading: true

:::sklego.preprocessing.outlier_remover.OutlierRemover
:::sklego.preprocessing.projections.OrthogonalTransformer
options:
show_root_full_path: true
show_root_heading: true

:::sklego.preprocessing.pandastransformers.PandasTypeSelector
:::sklego.preprocessing.outlier_remover.OutlierRemover
options:
show_root_full_path: true
show_root_heading: true

:::sklego.preprocessing.patsytransformer.PatsyTransformer
:::sklego.preprocessing.pandastransformers.PandasTypeSelector
options:
show_root_full_path: true
show_root_heading: true
Expand Down
5 changes: 1 addition & 4 deletions docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,13 @@ Install **scikit-lego**:

Some functionality can only be used if certain dependencies are installed. This can be done by specifying the extra dependencies in square brackets after the package name.

Currently supported extras are [**cvxpy**][cvxpy], [**formulaic**][formulaic], [**patsy**][patsy] and [**umap**][umap]. You can specify these as follows:
Currently supported extras are [**cvxpy**][cvxpy], [**formulaic**][formulaic] and [**umap**][umap]. You can specify these as follows:

=== "pip"

```bash
python -m pip install scikit-lego"[cvxpy]"
python -m pip install scikit-lego"[formulaic]"
python -m pip install scikit-lego"[patsy]"
python -m pip install scikit-lego"[umap]"
python -m pip install scikit-lego"[all]"
```
Expand All @@ -57,12 +56,10 @@ Currently supported extras are [**cvxpy**][cvxpy], [**formulaic**][formulaic], [

python -m pip install ".[cvxpy]"
python -m pip install ."[formulaic]"
python -m pip install ."[patsy]"
python -m pip install ."[umap]"
python -m pip install ".[all]"
```

[cvxpy]: https://www.cvxpy.org/
[formulaic]: https://matthewwardrop.github.io/formulaic/
[patsy]: https://patsy.readthedocs.io/en/latest/
[umap]: https://umap-learn.readthedocs.io/en/latest/index.html
49 changes: 12 additions & 37 deletions docs/user-guide/preprocessing.md
Original file line number Diff line number Diff line change
Expand Up @@ -114,60 +114,35 @@ Let's demonstrate how [`ColumnCapper`][column-capper-api] works in a few example
[0.10029693, 0.89859006]])
```

## Patsy Formulas
## Formulaic (Wilkinson formulas)

If you're used to the statistical programming language R you might have seen a formula object before. This is an object that represents a shorthand way to design variables used in a statistical model.

The [patsy][patsy-docs] python project took this idea and made it available for python. From sklego we've made a wrapper, called [`PatsyTransformer`][patsy-api], such that you can also use these in your pipelines.
The [formulaic][formulaic-docs] python project took this idea and made it available for python. From sklego we've made a wrapper, called [`FormulaicTransformer`][formulaic-api], such that you can also use these in your pipelines.

```py
--8<-- "docs/_scripts/preprocessing.py:patsy-1"
--8<-- "docs/_scripts/preprocessing.py:formulaic-1"
```

```console
DesignMatrix with shape (5, 5)
Intercept b[T.no] b[T.yes] a np.log(a)
1 0 1 1 0.00000
1 0 1 2 0.69315
1 1 0 3 1.09861
1 0 0 4 1.38629
1 0 1 5 1.60944
Terms:
'Intercept' (column 0)
'b' (columns 1:3)
'a' (column 3)
'np.log(a)' (column 4)
```
--8<-- "docs/_static/preprocessing/formulaic-1.md"

You might notice that the first column contains the constant array equal to one. You might also expect 3 dummy variable columns instead of 2.

This is because the design matrix from patsy attempts to keep the columns in the matrix linearly independent of each other.
This is because the design matrix from formulaic attempts to keep the columns in the matrix linearly independent of each other.

If this is not something you'd want to create you can choose to omit
it by indicating "-1" in the formula.

```py
--8<-- "docs/_scripts/preprocessing.py:patsy-2"
--8<-- "docs/_scripts/preprocessing.py:formulaic-2"
```

```console
DesignMatrix with shape (5, 5)
b[maybe] b[no] b[yes] a np.log(a)
0 0 1 1 0.00000
0 0 1 2 0.69315
0 1 0 3 1.09861
1 0 0 4 1.38629
0 0 1 5 1.60944
Terms:
'b' (columns 0:3)
'a' (column 3)
'np.log(a)' (column 4)
```
--8<-- "docs/_static/preprocessing/formulaic-2.md"

You'll notice that now the constant array is gone and it is replaced with a dummy array. Again this is now possible because patsy wants to guarantee that each column in this matrix is linearly independent of each other.
You'll notice that now the constant array is gone and it is replaced with a dummy array. Again this is now possible because formulaic wants to guarantee that each column in this matrix is linearly independent of each other.

The formula syntax is pretty powerful, if you'd like to learn we refer you
to [formulas][patsy-formulas] documentation.
to [formulas][formulaic-formulas] documentation.

## Repeating Basis Function Transformer

Expand Down Expand Up @@ -282,10 +257,10 @@ If these features are now passed to a model that supports monotonicity constrain
[meta-module]: ../../api/meta
[id-transformer-api]: ../../api/preprocessing#sklego.preprocessing.identitytransformer.IdentityTransformer
[column-capper-api]: ../../api/preprocessing#sklego.preprocessing.columncapper.ColumnCapper
[patsy-api]: ../../api/preprocessing#sklego.preprocessing.patsytransformer.PatsyTransformer
[formulaic-api]: ../../api/preprocessing#sklego.preprocessing.formulaictransformer.FormulaicTransformer
[rbf-api]: ../../api/preprocessing#sklego.preprocessing.repeatingbasis.RepeatingBasisFunction
[interval-encoder-api]: ../../api/preprocessing#sklego.preprocessing.intervalencoder.IntervalEncoder
[decay-section]: ../../user-guide/meta#decayed-estimation

[patsy-docs]: https://patsy.readthedocs.io/en/latest/
[patsy-formulas]: https://patsy.readthedocs.io/en/latest/formulas.html
[formulaic-docs]: https://matthewwardrop.github.io/formulaic/
[formulaic-formulas]: https://matthewwardrop.github.io/formulaic/formulas/
4 changes: 1 addition & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@ maintainers = [
]

dependencies = [
"Deprecated>=1.2.6",
"pandas>=1.1.5",
"scikit-learn>=1.0",
"importlib-metadata >= 1.0; python_version < '3.8'",
Expand All @@ -46,10 +45,9 @@ documentation = "https://koaning.github.io/scikit-lego/"
[project.optional-dependencies]
cvxpy = ["cmake", "osqp", "cvxpy>=1.1.8"]
formulaic = ["formulaic>=0.6.0"]
patsy = ["patsy>=0.5.1"]
umap = ["umap-learn>=0.4.6"]

all = ["scikit-lego[cvxpy,formulaic,patsy,umap]"]
all = ["scikit-lego[cvxpy,formulaic,umap]"]

docs = [
"mkdocs>=1.5.3",
Expand Down
1 change: 0 additions & 1 deletion readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,6 @@ Here's a list of features that this library currently offers:
- `sklego.preprocessing.IdentityTransformer` returns the same data, allows for concatenating pipelines
- `sklego.preprocessing.OrthogonalTransformer` makes all features linearly independent
- `sklego.preprocessing.PandasTypeSelector` selects columns based on pandas type
- `sklego.preprocessing.PatsyTransformer` applies a [patsy](https://patsy.readthedocs.io/en/latest/formulas.html) formula
- `sklego.preprocessing.RandomAdder` adds randomness in training
- `sklego.preprocessing.RepeatingBasisFunction` repeating feature engineering, useful for timeseries
- `sklego.preprocessing.DictMapper` assign numeric values on categorical columns
Expand Down
10 changes: 0 additions & 10 deletions sklego/linear_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@

import numpy as np
import pandas as pd
from deprecated.sphinx import deprecated
from scipy.optimize import minimize
from scipy.special._ufuncs import expit
from sklearn.base import BaseEstimator, RegressorMixin
Expand Down Expand Up @@ -629,15 +628,6 @@ def __new__(cls, *args, multi_class="ovr", n_jobs=1, **kwargs):
return multiclass_meta(_DemographicParityClassifier(*args, **kwargs), n_jobs=n_jobs)


@deprecated(
version="0.4.0",
reason="Please use `sklego.linear_model.DemographicParityClassifier instead`",
)
class FairClassifier(DemographicParityClassifier):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)


class _DemographicParityClassifier(_FairClassifier):
"""Classifier for Demographic Parity fairness constraint.
Expand Down
2 changes: 0 additions & 2 deletions sklego/meta/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
"HierarchicalPredictor",
"HierarchicalRegressor",
"OrdinalClassifier",
"OutlierRemover",
"SubjectiveClassifier",
"Thresholder",
"RegressionOutlierDetector",
Expand All @@ -25,7 +24,6 @@
from sklego.meta.hierarchical_predictor import HierarchicalClassifier, HierarchicalPredictor, HierarchicalRegressor
from sklego.meta.ordinal_classification import OrdinalClassifier
from sklego.meta.outlier_classifier import OutlierClassifier
from sklego.meta.outlier_remover import OutlierRemover
from sklego.meta.regression_outlier_detector import RegressionOutlierDetector
from sklego.meta.subjective_classifier import SubjectiveClassifier
from sklego.meta.thresholder import Thresholder
Expand Down
72 changes: 0 additions & 72 deletions sklego/meta/outlier_remover.py

This file was deleted.

1 change: 0 additions & 1 deletion sklego/notinstalled.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"cvxpy": {"version": ">=1.0.24", "extra_name": "cvxpy"},
"umap-learn": {"version": ">=0.4.6", "extra_name": "umap"},
"formulaic": {"version": ">=0.6.0", "extra_name": "formulaic"},
"patsy": {"version": ">=0.5.1", "extra_name": "patsy"},
}


Expand Down
Loading

0 comments on commit 47c29a5

Please sign in to comment.