Skip to content

Commit

Permalink
update default example
Browse files Browse the repository at this point in the history
  • Loading branch information
csinva committed Jul 3, 2022
1 parent 2f16302 commit 03e5dbe
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 39 deletions.
39 changes: 28 additions & 11 deletions docs/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,37 @@
</p>
<p><img align="center" width=100% src="https://csinva.io/imodels/img/anim.gif"> </img></p>
<p>Modern machine-learning models are increasingly complex, often making them difficult to interpret. This package provides a simple interface for fitting and using state-of-the-art interpretable models, all compatible with scikit-learn. These models can often replace black-box models (e.g. random forests) with simpler models (e.g. rule lists) while improving interpretability and computational efficiency, all without sacrificing predictive accuracy! Simply import a classifier or regressor and use the <code>fit</code> and <code>predict</code> methods, same as standard scikit-learn models.</p>
<pre><code class="language-python">from imodels import BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier
from imodels import RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor
<pre><code class="language-python">from sklearn.model_selection import train_test_split
from imodels import get_clean_dataset,
BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier,
RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor

model = BoostedRulesClassifier() # initialize a model
model.fit(X_train, y_train) # fit model
preds = model.predict(X_test) # predictions: shape is (n_test, 1)
# prepare data (a sample clinical dataset)
X, y, feature_names = get_clean_dataset('csi_pecarn_pred')
X_train, X_test, y_train, y_test = train_test_split(
X, y, random_state=42)

# fit the model
model = HSTreeRegressorCV(max_leaf_nodes=4) # initialize a tree model and specify only 4 leaf nodes
model.fit(X_train, y_train, feature_names=feature_names) # fit model
preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
print(model) # print the rule-based model
print(model) # print the model

-----------------------------
# the model consists of the following 3 rules
# if X1 &gt; 5: then 80.5% risk
# else if X2 &gt; 5: then 40% risk
# else: 10% risk
&gt; ------------------------------
&gt; Decision Tree with Hierarchical Shrinkage
&gt; Prediction is made by looking at the value in the appropriate leaf of the tree
&gt; ------------------------------
|--- FocalNeuroFindings2 &lt;= 0.50
| |--- HighriskDiving &lt;= 0.50
| | |--- Torticollis2 &lt;= 0.50
| | | |--- value: [0.10]
| | |--- Torticollis2 &gt; 0.50
| | | |--- value: [0.30]
| |--- HighriskDiving &gt; 0.50
| | |--- value: [0.68]
|--- FocalNeuroFindings2 &gt; 0.50
| |--- value: [0.42]
</code></pre>
<h3 id="installation">Installation</h3>
<p>Install with <code>pip install <a title="imodels" href="#imodels">imodels</a></code> (see <a href="https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md">here</a> for help). </p>
Expand Down
26 changes: 16 additions & 10 deletions docs/tree/hierarchical_shrinkage.html
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,8 @@
&#39;shrinkage_scheme_&#39;: self.shrinkage_scheme_}

def fit(self, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
self.feature_names = kwargs.pop(&#39;feature_names&#39;, None) # None returned if not passed
self.estimator_.fit(*args, **kwargs)
self._shrink()
self.complexity_ = compute_tree_complexity(self.estimator_.tree_)
Expand Down Expand Up @@ -249,7 +251,7 @@
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)
super().fit(X=X, y=y, *args, **kwargs)


class HSTreeRegressorCV(HSTreeRegressor):
Expand Down Expand Up @@ -285,14 +287,14 @@
# raise Warning(&#39;Passed an already fitted estimator,&#39;
# &#39;but shrinking not applied until fit method is called.&#39;)

def fit(self, X, y):
def fit(self, X, y, *args, **kwargs):
self.scores_ = []
for reg_param in self.reg_param_list:
est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)
super().fit(X=X, y=y, *args, **kwargs)


if __name__ == &#39;__main__&#39;:
Expand Down Expand Up @@ -412,6 +414,8 @@ <h2 id="params">Params</h2>
&#39;shrinkage_scheme_&#39;: self.shrinkage_scheme_}

def fit(self, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
self.feature_names = kwargs.pop(&#39;feature_names&#39;, None) # None returned if not passed
self.estimator_.fit(*args, **kwargs)
self._shrink()
self.complexity_ = compute_tree_complexity(self.estimator_.tree_)
Expand Down Expand Up @@ -547,6 +551,8 @@ <h3>Methods</h3>
<span>Expand source code</span>
</summary>
<pre><code class="python">def fit(self, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
self.feature_names = kwargs.pop(&#39;feature_names&#39;, None) # None returned if not passed
self.estimator_.fit(*args, **kwargs)
self._shrink()
self.complexity_ = compute_tree_complexity(self.estimator_.tree_)</code></pre>
Expand Down Expand Up @@ -716,7 +722,7 @@ <h2 id="params">Params</h2>
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)</code></pre>
super().fit(X=X, y=y, *args, **kwargs)</code></pre>
</details>
<h3>Ancestors</h3>
<ul class="hlist">
Expand All @@ -742,7 +748,7 @@ <h3>Methods</h3>
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)</code></pre>
super().fit(X=X, y=y, *args, **kwargs)</code></pre>
</details>
</dd>
</dl>
Expand Down Expand Up @@ -838,14 +844,14 @@ <h2 id="params">Params</h2>
# raise Warning(&#39;Passed an already fitted estimator,&#39;
# &#39;but shrinking not applied until fit method is called.&#39;)

def fit(self, X, y):
def fit(self, X, y, *args, **kwargs):
self.scores_ = []
for reg_param in self.reg_param_list:
est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)</code></pre>
super().fit(X=X, y=y, *args, **kwargs)</code></pre>
</details>
<h3>Ancestors</h3>
<ul class="hlist">
Expand All @@ -856,22 +862,22 @@ <h3>Ancestors</h3>
<h3>Methods</h3>
<dl>
<dt id="imodels.tree.hierarchical_shrinkage.HSTreeRegressorCV.fit"><code class="name flex">
<span>def <span class="ident">fit</span></span>(<span>self, X, y)</span>
<span>def <span class="ident">fit</span></span>(<span>self, X, y, *args, **kwargs)</span>
</code></dt>
<dd>
<div class="desc"></div>
<details class="source">
<summary>
<span>Expand source code</span>
</summary>
<pre><code class="python">def fit(self, X, y):
<pre><code class="python">def fit(self, X, y, *args, **kwargs):
self.scores_ = []
for reg_param in self.reg_param_list:
est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)</code></pre>
super().fit(X=X, y=y, *args, **kwargs)</code></pre>
</details>
</dd>
</dl>
Expand Down
8 changes: 5 additions & 3 deletions imodels/tree/hierarchical_shrinkage.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ def get_params(self, deep=True):
'shrinkage_scheme_': self.shrinkage_scheme_}

def fit(self, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
self.feature_names = kwargs.pop('feature_names', None) # None returned if not passed
self.estimator_.fit(*args, **kwargs)
self._shrink()
self.complexity_ = compute_tree_complexity(self.estimator_.tree_)
Expand Down Expand Up @@ -226,7 +228,7 @@ def fit(self, X, y, *args, **kwargs):
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)
super().fit(X=X, y=y, *args, **kwargs)


class HSTreeRegressorCV(HSTreeRegressor):
Expand Down Expand Up @@ -262,14 +264,14 @@ def __init__(self, estimator_: BaseEstimator = None,
# raise Warning('Passed an already fitted estimator,'
# 'but shrinking not applied until fit method is called.')

def fit(self, X, y):
def fit(self, X, y, *args, **kwargs):
self.scores_ = []
for reg_param in self.reg_param_list:
est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y)
super().fit(X=X, y=y, *args, **kwargs)


if __name__ == '__main__':
Expand Down
45 changes: 31 additions & 14 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,25 +25,42 @@
Modern machine-learning models are increasingly complex, often making them difficult to interpret. This package provides a simple interface for fitting and using state-of-the-art interpretable models, all compatible with scikit-learn. These models can often replace black-box models (e.g. random forests) with simpler models (e.g. rule lists) while improving interpretability and computational efficiency, all without sacrificing predictive accuracy! Simply import a classifier or regressor and use the `fit` and `predict` methods, same as standard scikit-learn models.

```python
from imodels import BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier
from imodels import RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor

model = BoostedRulesClassifier() # initialize a model
model.fit(X_train, y_train) # fit model
preds = model.predict(X_test) # predictions: shape is (n_test, 1)
from sklearn.model_selection import train_test_split
from imodels import get_clean_dataset,
BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier,
RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor

# prepare data (a sample clinical dataset)
X, y, feature_names = get_clean_dataset('csi_pecarn_pred')
X_train, X_test, y_train, y_test = train_test_split(
X, y, random_state=42)

# fit the model
model = HSTreeRegressorCV(max_leaf_nodes=4) # initialize a tree model and specify only 4 leaf nodes
model.fit(X_train, y_train, feature_names=feature_names) # fit model
preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
print(model) # print the rule-based model

-----------------------------
# the model consists of the following 3 rules
# if X1 > 5: then 80.5% risk
# else if X2 > 5: then 40% risk
# else: 10% risk
print(model) # print the model

> ------------------------------
> Decision Tree with Hierarchical Shrinkage
> Prediction is made by looking at the value in the appropriate leaf of the tree
> ------------------------------
|--- FocalNeuroFindings2 <= 0.50
| |--- HighriskDiving <= 0.50
| | |--- Torticollis2 <= 0.50
| | | |--- value: [0.10]
| | |--- Torticollis2 > 0.50
| | | |--- value: [0.30]
| |--- HighriskDiving > 0.50
| | |--- value: [0.68]
|--- FocalNeuroFindings2 > 0.50
| |--- value: [0.42]
```

### Installation

Install with `pip install imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help).
Install with `pip install imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help).

### Supported models

Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

setuptools.setup(
name="imodels",
version="1.3.1",
version="1.3.2",
author="Chandan Singh, Keyan Nasseri, Bin Yu, and others",
author_email="chandan_singh@berkeley.edu",
description="Implementations of various interpretable models",
Expand Down

0 comments on commit 03e5dbe

Please sign in to comment.