update default example

csinva · Jul 3, 2022 · 03e5dbe · 03e5dbe
1 parent 2f16302
commit 03e5dbe
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 39 deletions.
diff --git a/docs/index.html b/docs/index.html
@@ -37,20 +37,37 @@
 </p>
 <p><img align="center" width=100% src="https://csinva.io/imodels/img/anim.gif"> </img></p>
 <p>Modern machine-learning models are increasingly complex, often making them difficult to interpret. This package provides a simple interface for fitting and using state-of-the-art interpretable models, all compatible with scikit-learn. These models can often replace black-box models (e.g. random forests) with simpler models (e.g. rule lists) while improving interpretability and computational efficiency, all without sacrificing predictive accuracy! Simply import a classifier or regressor and use the <code>fit</code> and <code>predict</code> methods, same as standard scikit-learn models.</p>
-<pre><code class="language-python">from imodels import BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier
-from imodels import RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor
+<pre><code class="language-python">from sklearn.model_selection import train_test_split
+from imodels import get_clean_dataset,
+    BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier,
+    RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor
 
-model = BoostedRulesClassifier()  # initialize a model
-model.fit(X_train, y_train)   # fit model
-preds = model.predict(X_test) # predictions: shape is (n_test, 1)
+# prepare data (a sample clinical dataset)
+X, y, feature_names = get_clean_dataset('csi_pecarn_pred')
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, random_state=42)
+
+# fit the model
+model = HSTreeRegressorCV(max_leaf_nodes=4)  # initialize a tree model and specify only 4 leaf nodes
+model.fit(X_train, y_train, feature_names=feature_names)   # fit model
+preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
 preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
-print(model) # print the rule-based model
+print(model) # print the model
 
------------------------------
-# the model consists of the following 3 rules
-# if X1 &gt; 5: then 80.5% risk
-# else if X2 &gt; 5: then 40% risk
-# else: 10% risk
+&gt; ------------------------------
+&gt; Decision Tree with Hierarchical Shrinkage
+&gt; Prediction is made by looking at the value in the appropriate leaf of the tree
+&gt; ------------------------------
+|--- FocalNeuroFindings2 &lt;= 0.50
+|   |--- HighriskDiving &lt;= 0.50
+|   |   |--- Torticollis2 &lt;= 0.50
+|   |   |   |--- value: [0.10]
+|   |   |--- Torticollis2 &gt;  0.50
+|   |   |   |--- value: [0.30]
+|   |--- HighriskDiving &gt;  0.50
+|   |   |--- value: [0.68]
+|--- FocalNeuroFindings2 &gt;  0.50
+|   |--- value: [0.42]
 </code></pre>
 <h3 id="installation">Installation</h3>
 <p>Install with <code>pip install <a title="imodels" href="#imodels">imodels</a></code> (see <a href="https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md">here</a> for help). </p>

diff --git a/docs/tree/hierarchical_shrinkage.html b/docs/tree/hierarchical_shrinkage.html
@@ -80,6 +80,8 @@
                 &#39;shrinkage_scheme_&#39;: self.shrinkage_scheme_}
 
     def fit(self, *args, **kwargs):
+        # remove feature_names if it exists (note: only works as keyword-arg)
+        self.feature_names = kwargs.pop(&#39;feature_names&#39;, None)  # None returned if not passed
         self.estimator_.fit(*args, **kwargs)
         self._shrink()
         self.complexity_ = compute_tree_complexity(self.estimator_.tree_)
@@ -249,7 +251,7 @@
             cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
             self.scores_.append(np.mean(cv_scores))
         self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-        super().fit(X=X, y=y)
+        super().fit(X=X, y=y, *args, **kwargs)
 
 
 class HSTreeRegressorCV(HSTreeRegressor):
@@ -285,14 +287,14 @@
         #     raise Warning(&#39;Passed an already fitted estimator,&#39;
         #                   &#39;but shrinking not applied until fit method is called.&#39;)
 
-    def fit(self, X, y):
+    def fit(self, X, y, *args, **kwargs):
         self.scores_ = []
         for reg_param in self.reg_param_list:
             est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
             cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
             self.scores_.append(np.mean(cv_scores))
         self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-        super().fit(X=X, y=y)
+        super().fit(X=X, y=y, *args, **kwargs)
 
 
 if __name__ == &#39;__main__&#39;:
@@ -412,6 +414,8 @@ <h2 id="params">Params</h2>
                 &#39;shrinkage_scheme_&#39;: self.shrinkage_scheme_}
 
     def fit(self, *args, **kwargs):
+        # remove feature_names if it exists (note: only works as keyword-arg)
+        self.feature_names = kwargs.pop(&#39;feature_names&#39;, None)  # None returned if not passed
         self.estimator_.fit(*args, **kwargs)
         self._shrink()
         self.complexity_ = compute_tree_complexity(self.estimator_.tree_)
@@ -547,6 +551,8 @@ <h3>Methods</h3>
 <span>Expand source code</span>
 </summary>
 <pre><code class="python">def fit(self, *args, **kwargs):
+    # remove feature_names if it exists (note: only works as keyword-arg)
+    self.feature_names = kwargs.pop(&#39;feature_names&#39;, None)  # None returned if not passed
     self.estimator_.fit(*args, **kwargs)
     self._shrink()
     self.complexity_ = compute_tree_complexity(self.estimator_.tree_)</code></pre>
@@ -716,7 +722,7 @@ <h2 id="params">Params</h2>
             cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
             self.scores_.append(np.mean(cv_scores))
         self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-        super().fit(X=X, y=y)</code></pre>
+        super().fit(X=X, y=y, *args, **kwargs)</code></pre>
 </details>
 <h3>Ancestors</h3>
 <ul class="hlist">
@@ -742,7 +748,7 @@ <h3>Methods</h3>
         cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
         self.scores_.append(np.mean(cv_scores))
     self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-    super().fit(X=X, y=y)</code></pre>
+    super().fit(X=X, y=y, *args, **kwargs)</code></pre>
 </details>
 </dd>
 </dl>
@@ -838,14 +844,14 @@ <h2 id="params">Params</h2>
         #     raise Warning(&#39;Passed an already fitted estimator,&#39;
         #                   &#39;but shrinking not applied until fit method is called.&#39;)
 
-    def fit(self, X, y):
+    def fit(self, X, y, *args, **kwargs):
         self.scores_ = []
         for reg_param in self.reg_param_list:
             est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
             cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
             self.scores_.append(np.mean(cv_scores))
         self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-        super().fit(X=X, y=y)</code></pre>
+        super().fit(X=X, y=y, *args, **kwargs)</code></pre>
 </details>
 <h3>Ancestors</h3>
 <ul class="hlist">
@@ -856,22 +862,22 @@ <h3>Ancestors</h3>
 <h3>Methods</h3>
 <dl>
 <dt id="imodels.tree.hierarchical_shrinkage.HSTreeRegressorCV.fit"><code class="name flex">
-<span>def <span class="ident">fit</span></span>(<span>self, X, y)</span>
+<span>def <span class="ident">fit</span></span>(<span>self, X, y, *args, **kwargs)</span>
 </code></dt>
 <dd>
 <div class="desc"></div>
 <details class="source">
 <summary>
 <span>Expand source code</span>
 </summary>
-<pre><code class="python">def fit(self, X, y):
+<pre><code class="python">def fit(self, X, y, *args, **kwargs):
     self.scores_ = []
     for reg_param in self.reg_param_list:
         est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
         cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
         self.scores_.append(np.mean(cv_scores))
     self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-    super().fit(X=X, y=y)</code></pre>
+    super().fit(X=X, y=y, *args, **kwargs)</code></pre>
 </details>
 </dd>
 </dl>

diff --git a/imodels/tree/hierarchical_shrinkage.py b/imodels/tree/hierarchical_shrinkage.py
@@ -57,6 +57,8 @@ def get_params(self, deep=True):
                 'shrinkage_scheme_': self.shrinkage_scheme_}
 
     def fit(self, *args, **kwargs):
+        # remove feature_names if it exists (note: only works as keyword-arg)
+        self.feature_names = kwargs.pop('feature_names', None)  # None returned if not passed
         self.estimator_.fit(*args, **kwargs)
         self._shrink()
         self.complexity_ = compute_tree_complexity(self.estimator_.tree_)
@@ -226,7 +228,7 @@ def fit(self, X, y, *args, **kwargs):
             cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
             self.scores_.append(np.mean(cv_scores))
         self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-        super().fit(X=X, y=y)
+        super().fit(X=X, y=y, *args, **kwargs)
 
 
 class HSTreeRegressorCV(HSTreeRegressor):
@@ -262,14 +264,14 @@ def __init__(self, estimator_: BaseEstimator = None,
         #     raise Warning('Passed an already fitted estimator,'
         #                   'but shrinking not applied until fit method is called.')
 
-    def fit(self, X, y):
+    def fit(self, X, y, *args, **kwargs):
         self.scores_ = []
         for reg_param in self.reg_param_list:
             est = HSTreeRegressor(deepcopy(self.estimator_), reg_param)
             cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
             self.scores_.append(np.mean(cv_scores))
         self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
-        super().fit(X=X, y=y)
+        super().fit(X=X, y=y, *args, **kwargs)
 
 
 if __name__ == '__main__':

diff --git a/readme.md b/readme.md
@@ -25,25 +25,42 @@
 Modern machine-learning models are increasingly complex, often making them difficult to interpret. This package provides a simple interface for fitting and using state-of-the-art interpretable models, all compatible with scikit-learn. These models can often replace black-box models (e.g. random forests) with simpler models (e.g. rule lists) while improving interpretability and computational efficiency, all without sacrificing predictive accuracy! Simply import a classifier or regressor and use the `fit` and `predict` methods, same as standard scikit-learn models.
 
 ```python
-from imodels import BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier
-from imodels import RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor
-
-model = BoostedRulesClassifier()  # initialize a model
-model.fit(X_train, y_train)   # fit model
-preds = model.predict(X_test) # predictions: shape is (n_test, 1)
+from sklearn.model_selection import train_test_split
+from imodels import get_clean_dataset,
+    BoostedRulesClassifier, FIGSClassifier, SkopeRulesClassifier,
+    RuleFitRegressor, HSTreeRegressorCV, SLIMRegressor
+
+# prepare data (a sample clinical dataset)
+X, y, feature_names = get_clean_dataset('csi_pecarn_pred')
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, random_state=42)
+
+# fit the model
+model = HSTreeRegressorCV(max_leaf_nodes=4)  # initialize a tree model and specify only 4 leaf nodes
+model.fit(X_train, y_train, feature_names=feature_names)   # fit model
+preds = model.predict(X_test) # discrete predictions: shape is (n_test, 1)
 preds_proba = model.predict_proba(X_test) # predicted probabilities: shape is (n_test, n_classes)
-print(model) # print the rule-based model
-
------------------------------
-# the model consists of the following 3 rules
-# if X1 > 5: then 80.5% risk
-# else if X2 > 5: then 40% risk
-# else: 10% risk
+print(model) # print the model
+
+> ------------------------------
+> Decision Tree with Hierarchical Shrinkage
+> Prediction is made by looking at the value in the appropriate leaf of the tree
+> ------------------------------
+|--- FocalNeuroFindings2 <= 0.50
+|   |--- HighriskDiving <= 0.50
+|   |   |--- Torticollis2 <= 0.50
+|   |   |   |--- value: [0.10]
+|   |   |--- Torticollis2 >  0.50
+|   |   |   |--- value: [0.30]
+|   |--- HighriskDiving >  0.50
+|   |   |--- value: [0.68]
+|--- FocalNeuroFindings2 >  0.50
+|   |--- value: [0.42]
 ```
 
 ### Installation
 
-Install with `pip install imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help). 
+Install with `pip install imodels` (see [here](https://github.com/csinva/imodels/blob/master/docs/troubleshooting.md) for help).
 
 ### Supported models
 

diff --git a/setup.py b/setup.py
@@ -26,7 +26,7 @@
 
 setuptools.setup(
     name="imodels",
-    version="1.3.1",
+    version="1.3.2",
     author="Chandan Singh, Keyan Nasseri, Bin Yu, and others",
     author_email="chandan_singh@berkeley.edu",
     description="Implementations of various interpretable models",