Skip to content

Commit

Permalink
Merge pull request #150 from mepland/figs_sigmoid_output
Browse files Browse the repository at this point in the history
FIGS Classifier use sigmoid in predict_proba
  • Loading branch information
csinva authored Dec 28, 2022
2 parents 9d208dc + 235eb36 commit c77ecb9
Show file tree
Hide file tree
Showing 4 changed files with 718 additions and 707 deletions.
18 changes: 15 additions & 3 deletions imodels/tree/figs.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.special import expit
import sklearn.datasets
from sklearn import datasets
from sklearn import tree
Expand Down Expand Up @@ -367,7 +368,8 @@ def _tree_to_str_with_data(self, X, y, root: Node, prefix=''):
def __str__(self):
s = '> ------------------------------\n'
s += '> FIGS-Fast Interpretable Greedy-Tree Sums:\n'
s += '> \tPredictions are made by summing the "Val" reached by traversing each tree\n'
s += '> \tPredictions are made by summing the "Val" reached by traversing each tree.\n'
s += '> \tFor classifiers, a sigmoid function is then applied to the sum.\n'
s += '> ------------------------------\n'
s += '\n\t+\n'.join([self._tree_to_str(t) for t in self.trees_])
if hasattr(self, 'feature_names_') and self.feature_names_ is not None:
Expand Down Expand Up @@ -397,7 +399,11 @@ def predict(self, X, categorical_features=None):
elif isinstance(self, ClassifierMixin):
return (preds > 0.5).astype(int)

def predict_proba(self, X, categorical_features=None):
def predict_proba(self, X, categorical_features=None, use_clipped_prediction=False):
"""Predict probability for classifiers:
Default behavior is to constrain the outputs to the range of probabilities, i.e. 0 to 1, with a sigmoid function.
Set use_clipped_prediction=True to use prior behavior of clipping between 0 and 1 instead.
"""
if hasattr(self, "_encoder"):
X = self._encode_categories(X, categorical_features=categorical_features)
X = check_array(X)
Expand All @@ -406,7 +412,13 @@ def predict_proba(self, X, categorical_features=None):
preds = np.zeros(X.shape[0])
for tree in self.trees_:
preds += self._predict_tree(tree, X)
preds = np.clip(preds, a_min=0., a_max=1.) # constrain to range of probabilities
if use_clipped_prediction:
# old behavior, pre v1.3.9
# constrain to range of probabilities by clipping
preds = np.clip(preds, a_min=0., a_max=1.)
else:
# constrain to range of probabilities with a sigmoid function
preds = expit(preds)
return np.vstack((1 - preds, preds)).transpose()

def _predict_tree(self, root: Node, X):
Expand Down
1,371 changes: 690 additions & 681 deletions notebooks/FIGS_viz_demo.ipynb

Large diffs are not rendered by default.

34 changes: 12 additions & 22 deletions notebooks/FIGS_viz_demo.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,4 @@
# ---
# jupyter:
# jupytext:
# text_representation:
# extension: .py
# format_name: percent
# format_version: '1.3'
# jupytext_version: 1.14.1
# kernelspec:
# display_name: Python 3 (ipykernel)
# language: python
# name: python3
# ---

# %% [markdown] tags=[] jp-MarkdownHeadingCollapsed=true tags=[]
# %% [markdown]
# # Setup

# %%
Expand All @@ -25,17 +11,21 @@
from sklearn.tree import plot_tree, DecisionTreeClassifier
from sklearn import metrics

# TODo remove when package is updated
import sys,os
sys.path.append(os.path.expanduser('~/imodels'))

# installable with: `pip install imodels`
import imodels
from imodels import FIGSClassifier
import demo_helper
np.random.seed(13)

# %% [markdown] pycharm={"name": "#%% md\n"}
# %% [markdown]
# Let's start by loading some data in...
# Note, we need to still load the reg dataset first to get the same splits as in `imodels_demo.ipynb` due to the call to random

# %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"}
# %%
# ames housing dataset: https://www.openml.org/search?type=data&status=active&id=43926
X_train_reg, X_test_reg, y_train_reg, y_test_reg, feat_names_reg = demo_helper.get_ames_data()

Expand All @@ -49,14 +39,14 @@
# load some data
# print('Regression data training', X_train_reg.shape, 'Classification data training', X_train.shape)

# %% [markdown] tags=[]
# %% [markdown]
# ***
# # FIGS

# %%
model_figs = FIGSClassifier(max_rules=7)

# %% jupyter={"outputs_hidden": false} pycharm={"name": "#%%\n"}
# %%
# specify a decision tree with a maximum depth
model_figs.fit(X_train, y_train, feature_names=feat_names);

Expand All @@ -69,7 +59,7 @@
# %%
model_figs.plot(fig_size=7)

# %% [markdown] tags=[]
# %% [markdown]
# ***
# # `dtreeviz` Integration
# One tree at a time only, showing tree 0 here
Expand Down Expand Up @@ -111,7 +101,7 @@
# # `SKompiler` Integration
# One tree at a time only, showing tree 0 here

# %% tags=[]
# %%
from skompiler import skompile
from imodels.tree.viz_utils import extract_sklearn_tree_from_figs

Expand All @@ -121,5 +111,5 @@
# %%
print(expr.to('sqlalchemy/sqlite', component=1, assign_to='tree_0'))

# %% tags=[]
# %%
print(expr.to('python/code'))
2 changes: 1 addition & 1 deletion notebooks/demo_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def viz_classification_preds(probs, y_test):


def get_ames_data():
housing = fetch_openml(name="house_prices", as_frame=True)
housing = fetch_openml(name="house_prices", as_frame=True, parser='auto')
housing_target = housing['target'].values
housing_data_numeric = housing['data'].select_dtypes('number').drop(columns=['Id']).dropna(axis=1)
feature_names = housing_data_numeric.columns.values
Expand Down

0 comments on commit c77ecb9

Please sign in to comment.