From 1df3dc09187cac12ed660f4a4013d657c0096cc7 Mon Sep 17 00:00:00 2001 From: Chandan Singh Date: Wed, 4 Oct 2023 19:20:29 -0700 Subject: [PATCH] add AutoIntepretableRegressor --- .../marginal_shrinkage_linear_model.html | 618 ++++++++++++++++-- docs/algebraic/tree_gam.html | 220 +++++-- docs/index.html | 2 +- docs/util/automl.html | 365 ++++++++--- imodels/util/automl.py | 121 +++- readme.md | 2 +- setup.py | 2 +- 7 files changed, 1122 insertions(+), 208 deletions(-) diff --git a/docs/algebraic/marginal_shrinkage_linear_model.html b/docs/algebraic/marginal_shrinkage_linear_model.html index c3318214..73ce0678 100644 --- a/docs/algebraic/marginal_shrinkage_linear_model.html +++ b/docs/algebraic/marginal_shrinkage_linear_model.html @@ -25,11 +25,10 @@ import numpy as np import pandas as pd from sklearn.base import BaseEstimator -from sklearn.linear_model import LinearRegression, RidgeCV, Ridge, ElasticNetCV +from sklearn.linear_model import LinearRegression, RidgeCV, Ridge, ElasticNet, ElasticNetCV from sklearn.tree import DecisionTreeRegressor from sklearn.utils.multiclass import check_classification_targets -from sklearn.utils.validation import check_X_y -from sklearn.utils.validation import _check_sample_weight +from sklearn.utils.validation import check_X_y, check_array, _check_sample_weight from sklearn.model_selection import train_test_split from sklearn.metrics import accuracy_score, roc_auc_score from tqdm import tqdm @@ -114,7 +113,8 @@ self.coef_marginal_ = self._fit_marginal(X, y, sample_weight) # fit main - self.est_main_ = self._fit_main(X, y, sample_weight, self.coef_marginal_) + self.est_main_ = self._fit_main( + X, y, sample_weight, self.coef_marginal_) return self @@ -133,7 +133,8 @@ else: coef_marginal_ = [] for i in range(X.shape[1]): - est_marginal.fit(X[:, i].reshape(-1, 1), y, sample_weight=sample_weight) + est_marginal.fit(X[:, i].reshape(-1, 1), y, + sample_weight=sample_weight) coef_marginal_.append(deepcopy(est_marginal.coef_)) coef_marginal_ = np.vstack(coef_marginal_).squeeze() @@ -223,12 +224,82 @@ # ... +class MarginalLinearModel(BaseEstimator): + """Linear model that only fits marginal effects of each feature. + """ + + def __init__(self, alpha=1.0, l1_ratio=0.5, max_iter=1000, random_state=None): + '''Arguments are passed to sklearn.linear_model.ElasticNet + ''' + self.alpha = alpha + self.l1_ratio = l1_ratio + self.max_iter = max_iter + self.random_state = random_state + + def fit(self, X, y, sample_weight=None): + # checks + X, y = check_X_y(X, y, accept_sparse=False, multi_output=False) + sample_weight = _check_sample_weight(sample_weight, X, dtype=None) + if isinstance(self, ClassifierMixin): + check_classification_targets(y) + self.classes_, y = np.unique(y, return_inverse=True) + + # fit marginal estimator to each feature + coef_marginal_ = [] + for i in range(X.shape[1]): + est_marginal = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio, + max_iter=self.max_iter, random_state=self.random_state) + est_marginal.fit(X[:, i].reshape(-1, 1), y, + sample_weight=sample_weight) + coef_marginal_.append(deepcopy(est_marginal.coef_)) + coef_marginal_ = np.vstack(coef_marginal_).squeeze() + + self.coef_ = coef_marginal_ / X.shape[1] + self.alpha_ = self.alpha + + return self + + def predict_proba(self, X): + X = check_array(X, accept_sparse=False, dtype=None) + return X @ self.coef_ + + def predict(self, X): + probs = self.predict_proba(X) + if isinstance(self, ClassifierMixin): + return np.argmax(probs, axis=1) + else: + return probs + + +class MarginalLinearRegressor(MarginalLinearModel, RegressorMixin): + ... + + +class MarginalLinearClassifier(MarginalLinearModel, ClassifierMixin): + ... + + +# if __name__ == '__main__': +# X, y = imodels.get_clean_dataset('heart') +# X_train, X_test, y_train, y_test = train_test_split( +# X, y, random_state=42, test_size=0.2) +# m = MarginalLinearModelRegressor() + +# m.fit(X_train, y_train) +# print(m.coef_) +# print(m.predict(X_test)) +# print(m.score(X_test, y_test)) + if __name__ == "__main__": # X, y, feature_names = imodels.get_clean_dataset("heart") X, y, feature_names = imodels.get_clean_dataset( **imodels.util.data_util.DSET_KWARGS["california_housing"] ) + # scale the data + X = StandardScaler().fit_transform(X) + y = StandardScaler().fit_transform(y.reshape(-1, 1)).squeeze() + print("shapes", X.shape, y.shape, "nunique", np.unique(y).size) X_train, X_test, y_train, y_test = train_test_split( X, y, random_state=42, test_size=0.2 @@ -243,28 +314,30 @@ ) results = defaultdict(list) for m in [ - MarginalShrinkageLinearModelRegressor(**kwargs), - MarginalShrinkageLinearModelRegressor(est_marginal_name=None, **kwargs), - MarginalShrinkageLinearModelRegressor( - est_main_name=None, - **kwargs, - ), - MarginalShrinkageLinearModelRegressor( - est_marginal_name="ridge", - est_main_name="ridge", - marginal_sign_constraint=True, - **kwargs, - ), - MarginalShrinkageLinearModelRegressor( - est_marginal_name=None, est_main_name="lasso", **kwargs - ), - MarginalShrinkageLinearModelRegressor( - est_marginal_name="ridge", - est_main_name="lasso", - marginal_sign_constraint=True, - **kwargs, - ), - # RidgeCV(alphas=alphas, fit_intercept=False), + # MarginalShrinkageLinearModelRegressor(**kwargs), + # MarginalShrinkageLinearModelRegressor( + # est_marginal_name=None, **kwargs), + # MarginalShrinkageLinearModelRegressor( + # est_main_name=None, + # **kwargs, + # ), + # MarginalShrinkageLinearModelRegressor( + # est_marginal_name="ridge", + # est_main_name="ridge", + # marginal_sign_constraint=True, + # **kwargs, + # ), + # MarginalShrinkageLinearModelRegressor( + # est_marginal_name=None, est_main_name="lasso", **kwargs + # ), + # MarginalShrinkageLinearModelRegressor( + # est_marginal_name="ridge", + # est_main_name="lasso", + # marginal_sign_constraint=True, + # **kwargs, + # ), + MarginalLinearRegressor(alpha=1.0), + RidgeCV(alphas=alphas, fit_intercept=False), ]: results["model_name"].append(str(m)) m.fit(X_train, y_train) @@ -277,11 +350,14 @@ results["test_roc"].append( roc_auc_score(y_test, m.predict_proba(X_test)[:, 1]) ) - results["acc_train"].append(accuracy_score(y_train, m.predict(X_train))) - results["acc_test"].append(accuracy_score(y_test, m.predict(X_test))) + results["acc_train"].append( + accuracy_score(y_train, m.predict(X_train))) + results["acc_test"].append( + accuracy_score(y_test, m.predict(X_test))) else: y_pred = m.predict(X_test) - results["train_mse"].append(np.mean((y_train - m.predict(X_train)) ** 2)) + results["train_mse"].append( + np.mean((y_train - m.predict(X_train)) ** 2)) results["test_mse"].append(np.mean((y_test - y_pred) ** 2)) results["train_r2"].append(m.score(X_train, y_train)) results["test_r2"].append(m.score(X_test, y_test)) @@ -294,10 +370,10 @@ coefs.append(deepcopy(lin.coef_)) print("alpha best", lin.alpha_) - diffs = pd.DataFrame({str(i): coefs[i] for i in range(len(coefs))}) - diffs["diff 0 - 1"] = diffs["0"] - diffs["1"] - diffs["diff 1 - 2"] = diffs["1"] - diffs["2"] - print(diffs) + # diffs = pd.DataFrame({str(i): coefs[i] for i in range(len(coefs))}) + # diffs["diff 0 - 1"] = diffs["0"] - diffs["1"] + # diffs["diff 1 - 2"] = diffs["1"] - diffs["2"] + # print(diffs) # don't round strings with pd.option_context( @@ -315,6 +391,450 @@

Classes

+
+class MarginalLinearClassifier +(alpha=1.0, l1_ratio=0.5, max_iter=1000, random_state=None) +
+
+

Linear model that only fits marginal effects of each feature.

+

Arguments are passed to sklearn.linear_model.ElasticNet

+
+ +Expand source code + +
class MarginalLinearClassifier(MarginalLinearModel, ClassifierMixin):
+    ...
+
+

Ancestors

+
    +
  • MarginalLinearModel
  • +
  • sklearn.base.BaseEstimator
  • +
  • sklearn.utils._metadata_requests._MetadataRequester
  • +
  • sklearn.base.ClassifierMixin
  • +
+

Methods

+
+
+def set_score_request(self: MarginalLinearClassifier, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> MarginalLinearClassifier +
+
+

Request metadata passed to the score method.

+

Note that this method is only relevant if +enable_metadata_routing=True (see :func:sklearn.set_config). +Please see :ref:User Guide <metadata_routing> on how the routing +mechanism works.

+

The options for each parameter are:

+
    +
  • +

    True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.

    +
  • +
  • +

    False: metadata is not requested and the meta-estimator will not pass it to score.

    +
  • +
  • +

    None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

    +
  • +
  • +

    str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

    +
  • +
+

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the +existing request. This allows you to change the request for some +parameters and not others.

+
+

Added in version: 1.3

+
+
+

Note

+

This method is only relevant if this estimator is used as a +sub-estimator of a meta-estimator, e.g. used inside a +:class:pipeline.Pipeline. Otherwise it has no effect.

+
+

Parameters

+
+
sample_weight : str, True, False, or None, +default=sklearn.utils.metadata_routing.UNCHANGED
+
Metadata routing for sample_weight parameter in score.
+
+

Returns

+
+
self : object
+
The updated object.
+
+
+ +Expand source code + +
def func(**kw):
+    """Updates the request for provided parameters
+
+    This docstring is overwritten below.
+    See REQUESTER_DOC for expected functionality
+    """
+    if not _routing_enabled():
+        raise RuntimeError(
+            "This method is only available when metadata routing is enabled."
+            " You can enable it using"
+            " sklearn.set_config(enable_metadata_routing=True)."
+        )
+
+    if self.validate_keys and (set(kw) - set(self.keys)):
+        raise TypeError(
+            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
+            f" are: {set(self.keys)}"
+        )
+
+    requests = instance._get_metadata_request()
+    method_metadata_request = getattr(requests, self.name)
+
+    for prop, alias in kw.items():
+        if alias is not UNCHANGED:
+            method_metadata_request.add_request(param=prop, alias=alias)
+    instance._metadata_request = requests
+
+    return instance
+
+
+
+

Inherited members

+ +
+
+class MarginalLinearModel +(alpha=1.0, l1_ratio=0.5, max_iter=1000, random_state=None) +
+
+

Linear model that only fits marginal effects of each feature.

+

Arguments are passed to sklearn.linear_model.ElasticNet

+
+ +Expand source code + +
class MarginalLinearModel(BaseEstimator):
+    """Linear model that only fits marginal effects of each feature.
+    """
+
+    def __init__(self, alpha=1.0, l1_ratio=0.5, max_iter=1000, random_state=None):
+        '''Arguments are passed to sklearn.linear_model.ElasticNet
+        '''
+        self.alpha = alpha
+        self.l1_ratio = l1_ratio
+        self.max_iter = max_iter
+        self.random_state = random_state
+
+    def fit(self, X, y, sample_weight=None):
+        # checks
+        X, y = check_X_y(X, y, accept_sparse=False, multi_output=False)
+        sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+        if isinstance(self, ClassifierMixin):
+            check_classification_targets(y)
+            self.classes_, y = np.unique(y, return_inverse=True)
+
+        # fit marginal estimator to each feature
+        coef_marginal_ = []
+        for i in range(X.shape[1]):
+            est_marginal = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio,
+                                      max_iter=self.max_iter, random_state=self.random_state)
+            est_marginal.fit(X[:, i].reshape(-1, 1), y,
+                             sample_weight=sample_weight)
+            coef_marginal_.append(deepcopy(est_marginal.coef_))
+        coef_marginal_ = np.vstack(coef_marginal_).squeeze()
+
+        self.coef_ = coef_marginal_ / X.shape[1]
+        self.alpha_ = self.alpha
+
+        return self
+
+    def predict_proba(self, X):
+        X = check_array(X, accept_sparse=False, dtype=None)
+        return X @ self.coef_
+
+    def predict(self, X):
+        probs = self.predict_proba(X)
+        if isinstance(self, ClassifierMixin):
+            return np.argmax(probs, axis=1)
+        else:
+            return probs
+
+

Ancestors

+
    +
  • sklearn.base.BaseEstimator
  • +
  • sklearn.utils._metadata_requests._MetadataRequester
  • +
+

Subclasses

+ +

Methods

+
+
+def fit(self, X, y, sample_weight=None) +
+
+
+
+ +Expand source code + +
def fit(self, X, y, sample_weight=None):
+    # checks
+    X, y = check_X_y(X, y, accept_sparse=False, multi_output=False)
+    sample_weight = _check_sample_weight(sample_weight, X, dtype=None)
+    if isinstance(self, ClassifierMixin):
+        check_classification_targets(y)
+        self.classes_, y = np.unique(y, return_inverse=True)
+
+    # fit marginal estimator to each feature
+    coef_marginal_ = []
+    for i in range(X.shape[1]):
+        est_marginal = ElasticNet(alpha=self.alpha, l1_ratio=self.l1_ratio,
+                                  max_iter=self.max_iter, random_state=self.random_state)
+        est_marginal.fit(X[:, i].reshape(-1, 1), y,
+                         sample_weight=sample_weight)
+        coef_marginal_.append(deepcopy(est_marginal.coef_))
+    coef_marginal_ = np.vstack(coef_marginal_).squeeze()
+
+    self.coef_ = coef_marginal_ / X.shape[1]
+    self.alpha_ = self.alpha
+
+    return self
+
+
+
+def predict(self, X) +
+
+
+
+ +Expand source code + +
def predict(self, X):
+    probs = self.predict_proba(X)
+    if isinstance(self, ClassifierMixin):
+        return np.argmax(probs, axis=1)
+    else:
+        return probs
+
+
+
+def predict_proba(self, X) +
+
+
+
+ +Expand source code + +
def predict_proba(self, X):
+    X = check_array(X, accept_sparse=False, dtype=None)
+    return X @ self.coef_
+
+
+
+def set_fit_request(self: MarginalLinearModel, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> MarginalLinearModel +
+
+

Request metadata passed to the fit method.

+

Note that this method is only relevant if +enable_metadata_routing=True (see :func:sklearn.set_config). +Please see :ref:User Guide <metadata_routing> on how the routing +mechanism works.

+

The options for each parameter are:

+
    +
  • +

    True: metadata is requested, and passed to fit if provided. The request is ignored if metadata is not provided.

    +
  • +
  • +

    False: metadata is not requested and the meta-estimator will not pass it to fit.

    +
  • +
  • +

    None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

    +
  • +
  • +

    str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

    +
  • +
+

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the +existing request. This allows you to change the request for some +parameters and not others.

+
+

Added in version: 1.3

+
+
+

Note

+

This method is only relevant if this estimator is used as a +sub-estimator of a meta-estimator, e.g. used inside a +:class:pipeline.Pipeline. Otherwise it has no effect.

+
+

Parameters

+
+
sample_weight : str, True, False, or None, +default=sklearn.utils.metadata_routing.UNCHANGED
+
Metadata routing for sample_weight parameter in fit.
+
+

Returns

+
+
self : object
+
The updated object.
+
+
+ +Expand source code + +
def func(**kw):
+    """Updates the request for provided parameters
+
+    This docstring is overwritten below.
+    See REQUESTER_DOC for expected functionality
+    """
+    if not _routing_enabled():
+        raise RuntimeError(
+            "This method is only available when metadata routing is enabled."
+            " You can enable it using"
+            " sklearn.set_config(enable_metadata_routing=True)."
+        )
+
+    if self.validate_keys and (set(kw) - set(self.keys)):
+        raise TypeError(
+            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
+            f" are: {set(self.keys)}"
+        )
+
+    requests = instance._get_metadata_request()
+    method_metadata_request = getattr(requests, self.name)
+
+    for prop, alias in kw.items():
+        if alias is not UNCHANGED:
+            method_metadata_request.add_request(param=prop, alias=alias)
+    instance._metadata_request = requests
+
+    return instance
+
+
+
+
+
+class MarginalLinearRegressor +(alpha=1.0, l1_ratio=0.5, max_iter=1000, random_state=None) +
+
+

Linear model that only fits marginal effects of each feature.

+

Arguments are passed to sklearn.linear_model.ElasticNet

+
+ +Expand source code + +
class MarginalLinearRegressor(MarginalLinearModel, RegressorMixin):
+    ...
+
+

Ancestors

+
    +
  • MarginalLinearModel
  • +
  • sklearn.base.BaseEstimator
  • +
  • sklearn.utils._metadata_requests._MetadataRequester
  • +
  • sklearn.base.RegressorMixin
  • +
+

Methods

+
+
+def set_score_request(self: MarginalLinearRegressor, *, sample_weight: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> MarginalLinearRegressor +
+
+

Request metadata passed to the score method.

+

Note that this method is only relevant if +enable_metadata_routing=True (see :func:sklearn.set_config). +Please see :ref:User Guide <metadata_routing> on how the routing +mechanism works.

+

The options for each parameter are:

+
    +
  • +

    True: metadata is requested, and passed to score if provided. The request is ignored if metadata is not provided.

    +
  • +
  • +

    False: metadata is not requested and the meta-estimator will not pass it to score.

    +
  • +
  • +

    None: metadata is not requested, and the meta-estimator will raise an error if the user provides it.

    +
  • +
  • +

    str: metadata should be passed to the meta-estimator with this given alias instead of the original name.

    +
  • +
+

The default (sklearn.utils.metadata_routing.UNCHANGED) retains the +existing request. This allows you to change the request for some +parameters and not others.

+
+

Added in version: 1.3

+
+
+

Note

+

This method is only relevant if this estimator is used as a +sub-estimator of a meta-estimator, e.g. used inside a +:class:pipeline.Pipeline. Otherwise it has no effect.

+
+

Parameters

+
+
sample_weight : str, True, False, or None, +default=sklearn.utils.metadata_routing.UNCHANGED
+
Metadata routing for sample_weight parameter in score.
+
+

Returns

+
+
self : object
+
The updated object.
+
+
+ +Expand source code + +
def func(**kw):
+    """Updates the request for provided parameters
+
+    This docstring is overwritten below.
+    See REQUESTER_DOC for expected functionality
+    """
+    if not _routing_enabled():
+        raise RuntimeError(
+            "This method is only available when metadata routing is enabled."
+            " You can enable it using"
+            " sklearn.set_config(enable_metadata_routing=True)."
+        )
+
+    if self.validate_keys and (set(kw) - set(self.keys)):
+        raise TypeError(
+            f"Unexpected args: {set(kw) - set(self.keys)}. Accepted arguments"
+            f" are: {set(self.keys)}"
+        )
+
+    requests = instance._get_metadata_request()
+    method_metadata_request = getattr(requests, self.name)
+
+    for prop, alias in kw.items():
+        if alias is not UNCHANGED:
+            method_metadata_request.add_request(param=prop, alias=alias)
+    instance._metadata_request = requests
+
+    return instance
+
+
+
+

Inherited members

+ +
class MarginalShrinkageLinearModel (est_marginal_name='ridge', est_main_name='ridge', marginal_divide_by_d=True, marginal_sign_constraint=False, alphas=[0.001, 0.01, 0.1, 1.0, 10.0, 100.0, 1000.0, 10000.0, 100000.0], elasticnet_ratio=0.5, random_state=None) @@ -417,7 +937,8 @@

Params

self.coef_marginal_ = self._fit_marginal(X, y, sample_weight) # fit main - self.est_main_ = self._fit_main(X, y, sample_weight, self.coef_marginal_) + self.est_main_ = self._fit_main( + X, y, sample_weight, self.coef_marginal_) return self @@ -436,7 +957,8 @@

Params

else: coef_marginal_ = [] for i in range(X.shape[1]): - est_marginal.fit(X[:, i].reshape(-1, 1), y, sample_weight=sample_weight) + est_marginal.fit(X[:, i].reshape(-1, 1), y, + sample_weight=sample_weight) coef_marginal_.append(deepcopy(est_marginal.coef_)) coef_marginal_ = np.vstack(coef_marginal_).squeeze() @@ -553,7 +1075,8 @@

Methods

self.coef_marginal_ = self._fit_marginal(X, y, sample_weight) # fit main - self.est_main_ = self._fit_main(X, y, sample_weight, self.coef_marginal_) + self.est_main_ = self._fit_main( + X, y, sample_weight, self.coef_marginal_) return self
@@ -820,6 +1343,27 @@

Index 🔍

  • Classes

    • +

      MarginalLinearClassifier

      + +
    • +
    • +

      MarginalLinearModel

      + +
    • +
    • +

      MarginalLinearRegressor

      + +
    • +
    • MarginalShrinkageLinearModel

      • fit
      • diff --git a/docs/algebraic/tree_gam.html b/docs/algebraic/tree_gam.html index 8a423683..960e2a76 100644 --- a/docs/algebraic/tree_gam.html +++ b/docs/algebraic/tree_gam.html @@ -25,7 +25,7 @@ import numpy as np import pandas as pd from sklearn.base import BaseEstimator -from sklearn.linear_model import LinearRegression, RidgeCV +from sklearn.linear_model import ElasticNetCV, LinearRegression, RidgeCV from sklearn.tree import DecisionTreeRegressor from sklearn.utils.validation import check_is_fitted from sklearn.utils import check_array @@ -62,6 +62,7 @@ fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, + fit_posthoc_tree_coefs=None, boosting_strategy="cyclic", validation_frac=0.15, random_state=None, @@ -96,6 +97,8 @@ 1 means no decay, 0 means only use marginal effects shape = (1 - decay_rate_towards_marginal) * shape + decay_rate_towards_marginal * marginal_shape The way this is implemented is by keeping track of how many times to multiply decay_rate_towards_marginal for each cyclic estimator + fit_posthoc_tree_coefs: str [None, "ridge"] + Whether to fit a linear model to the tree coefficients after fitting the cyclic boosting. boosting_strategy : str ["cyclic", "greedy"] Whether to use cyclic boosting (cycle over features) or greedy boosting (select best feature at each step) validation_frac: float @@ -113,6 +116,7 @@ self.fit_linear_marginal = fit_linear_marginal self.select_linear_marginal = select_linear_marginal self.decay_rate_towards_marginal = decay_rate_towards_marginal + self.fit_posthoc_tree_coefs = fit_posthoc_tree_coefs self.boosting_strategy = boosting_strategy self.validation_frac = validation_frac self.random_state = random_state @@ -139,6 +143,7 @@ sample_weight, test_size=self.validation_frac, random_state=self.random_state, + stratify=y if isinstance(self, ClassifierMixin) else None, ) self.estimators_marginal = [] @@ -162,6 +167,9 @@ sample_weight_val, ) + if self.fit_posthoc_tree_coefs is not None: + self._fit_posthoc_tree_coefs(X_train, y_train, sample_weight_train) + self.mse_val_ = self._calc_mse(X_val, y_val, sample_weight_val) return self @@ -186,7 +194,8 @@ ) est.fit(X_, residuals_train, sample_weight=sample_weight_train) if self.reg_param_marginal > 0: - est = imodels.HSTreeRegressor(est, reg_param=self.reg_param_marginal) + est = imodels.HSTreeRegressor( + est, reg_param=self.reg_param_marginal) self.estimators_marginal.append(est) if ( @@ -196,9 +205,11 @@ if self.fit_linear_marginal.lower() == "ridge": linear_marginal = RidgeCV(fit_intercept=False) elif self.fit_linear_marginal == "NNLS": - linear_marginal = LinearRegression(fit_intercept=False, positive=True) + linear_marginal = LinearRegression( + fit_intercept=False, positive=True) linear_marginal.fit( - np.array([est.predict(X_train) for est in self.estimators_marginal]).T, + np.array([est.predict(X_train) + for est in self.estimators_marginal]).T, residuals_train, sample_weight_train, ) @@ -235,12 +246,14 @@ ) est.fit(X_, residuals_train, sample_weight=sample_weight_train) succesfully_split_on_feature = np.all( - (est.tree_.feature[0] == feature_num) | (est.tree_.feature[0] == -2) + (est.tree_.feature[0] == feature_num) | ( + est.tree_.feature[0] == -2) ) if not succesfully_split_on_feature: continue if self.reg_param > 0: - est = imodels.HSTreeRegressor(est, reg_param=self.reg_param) + est = imodels.HSTreeRegressor( + est, reg_param=self.reg_param) self.estimators_.append(est) residuals_train_new = ( residuals_train - self.learning_rate * est.predict(X_train) @@ -252,20 +265,23 @@ X_train, y_train, sample_weight_train ) # don't add each estimator for greedy - boosting_round_ests.append(deepcopy(self.estimators_.pop())) + boosting_round_ests.append( + deepcopy(self.estimators_.pop())) boosting_round_mses.append(mse_train_new) if self.boosting_strategy == "greedy": best_est = boosting_round_ests[np.argmin(boosting_round_mses)] self.estimators_.append(best_est) residuals_train = ( - residuals_train - self.learning_rate * best_est.predict(X_train) + residuals_train - self.learning_rate * + best_est.predict(X_train) ) # decay marginal effects if self.decay_rate_towards_marginal < 1.0: new_decay_coefs = [self.decay_rate_towards_marginal] * ( - len(self.estimators_) - len(self.decay_coef_towards_marginal_) + len(self.estimators_) - + len(self.decay_coef_towards_marginal_) ) # print(self.decay_coef_towards_marginal_) # print('new_decay_coefs', new_decay_coefs) @@ -283,6 +299,25 @@ else: mse_val = mse_val_new + def _fit_posthoc_tree_coefs(self, X, y, sample_weight=None): + # extract predictions from each tree + X_pred_tree = np.array([est.predict(X) for est in self.estimators_]).T + print('shapes', X.shape, X_pred_tree.shape, + y.shape, len(self.estimators_)) + + coef_prior = np.ones(len(self.estimators_)) * self.learning_rate + y = y - self.bias_ - X_pred_tree @ coef_prior + + if self.fit_posthoc_tree_coefs.lower() == "ridge": + m = RidgeCV(fit_intercept=False) + elif self.fit_posthoc_tree_coefs.lower() == "nnls": + m = LinearRegression(fit_intercept=False, positive=True) + elif self.fit_posthoc_tree_coefs.lower() == "elasticnet": + m = ElasticNetCV(fit_intercept=False, positive=True) + + m.fit(X_pred_tree, y, sample_weight=sample_weight) + self.cyclic_coef_ = m.coef_ + coef_prior + def predict_proba(self, X, marginal_only=False): """ Params @@ -293,22 +328,33 @@ X = check_array(X, accept_sparse=False, dtype=None) check_is_fitted(self) probs1 = np.ones(X.shape[0]) * self.bias_ + + # marginal prediction for i, est in enumerate(self.estimators_marginal): probs1 += est.predict(X) * self.marginal_coef_[i] + + # cyclic coefs prediction if not marginal_only: + if not hasattr(self, "cyclic_coef_"): + cyclic_coef_ = np.ones( + len(self.estimators_)) * self.learning_rate + else: + cyclic_coef_ = self.cyclic_coef_ + # print('coef', cyclic_coef_) + if self.decay_rate_towards_marginal < 1.0: for i, est in enumerate(self.estimators_): if i < len(self.decay_coef_towards_marginal_): probs1 += ( - self.learning_rate + cyclic_coef_[i] * self.decay_coef_towards_marginal_[i] * est.predict(X) ) else: - probs1 += self.learning_rate * est.predict(X) + probs1 += cyclic_coef_[i] * est.predict(X) else: - for est in self.estimators_: - probs1 += self.learning_rate * est.predict(X) + for i, est in enumerate(self.estimators_): + probs1 += cyclic_coef_[i] * est.predict(X) probs1 = np.clip(probs1, a_min=0, a_max=1) return np.array([1 - probs1, probs1]).T @@ -340,32 +386,35 @@ boosting_strategy="cyclic", random_state=42, learning_rate=0.1, - max_leaf_nodes=2, - select_linear_marginal=True, - fit_linear_marginal="NNLS", - n_boosting_rounds_marginal=3, - decay_rate_towards_marginal=0, - n_boosting_rounds=10, + max_leaf_nodes=3, + # select_linear_marginal=True, + # fit_linear_marginal="NNLS", + # n_boosting_rounds_marginal=3, + # decay_rate_towards_marginal=0, + fit_posthoc_tree_coefs="elasticnet", + n_boosting_rounds=100, ) gam.fit(X, y_train) # check roc auc score y_pred = gam.predict_proba(X_test)[:, 1] - print( - "train roc:", - roc_auc_score(y_train, gam.predict_proba(X)[:, 1]).round(3), - ) + # print( + # "train roc:", + # roc_auc_score(y_train, gam.predict_proba(X)[:, 1]).round(3), + # ) print("test roc:", roc_auc_score(y_test, y_pred).round(3)) - print( - "accs", - accuracy_score(y_train, gam.predict(X)).round(3), - accuracy_score(y_test, gam.predict(X_test)).round(3), - "imb", - np.mean(y_train).round(3), - np.mean(y_test).round(3), - ) - - # print(gam.estimators_) + print("test acc:", accuracy_score(y_test, gam.predict(X_test)).round(3)) + print('\t(imb:', np.mean(y_test).round(3), ')') + # print( + # "accs", + # accuracy_score(y_train, gam.predict(X)).round(3), + # accuracy_score(y_test, gam.predict(X_test)).round(3), + # "imb", + # np.mean(y_train).round(3), + # np.mean(y_test).round(3), + # ) + + # # print(gam.estimators_)
  • @@ -379,7 +428,7 @@

    Classes

    class TreeGAM -(n_boosting_rounds=100, max_leaf_nodes=3, reg_param=0.0, learning_rate: float = 0.01, n_boosting_rounds_marginal=0, max_leaf_nodes_marginal=2, reg_param_marginal=0.0, fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, boosting_strategy='cyclic', validation_frac=0.15, random_state=None) +(n_boosting_rounds=100, max_leaf_nodes=3, reg_param=0.0, learning_rate: float = 0.01, n_boosting_rounds_marginal=0, max_leaf_nodes_marginal=2, reg_param_marginal=0.0, fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, fit_posthoc_tree_coefs=None, boosting_strategy='cyclic', validation_frac=0.15, random_state=None)

    Tree-based GAM classifier. @@ -415,6 +464,8 @@

    Params

    1 means no decay, 0 means only use marginal effects shape = (1 - decay_rate_towards_marginal) * shape + decay_rate_towards_marginal * marginal_shape The way this is implemented is by keeping track of how many times to multiply decay_rate_towards_marginal for each cyclic estimator +fit_posthoc_tree_coefs: str [None, "ridge"] +Whether to fit a linear model to the tree coefficients after fitting the cyclic boosting. boosting_strategy : str ["cyclic", "greedy"] Whether to use cyclic boosting (cycle over features) or greedy boosting (select best feature at each step) validation_frac: float @@ -445,6 +496,7 @@

    Params

    fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, + fit_posthoc_tree_coefs=None, boosting_strategy="cyclic", validation_frac=0.15, random_state=None, @@ -479,6 +531,8 @@

    Params

    1 means no decay, 0 means only use marginal effects shape = (1 - decay_rate_towards_marginal) * shape + decay_rate_towards_marginal * marginal_shape The way this is implemented is by keeping track of how many times to multiply decay_rate_towards_marginal for each cyclic estimator + fit_posthoc_tree_coefs: str [None, "ridge"] + Whether to fit a linear model to the tree coefficients after fitting the cyclic boosting. boosting_strategy : str ["cyclic", "greedy"] Whether to use cyclic boosting (cycle over features) or greedy boosting (select best feature at each step) validation_frac: float @@ -496,6 +550,7 @@

    Params

    self.fit_linear_marginal = fit_linear_marginal self.select_linear_marginal = select_linear_marginal self.decay_rate_towards_marginal = decay_rate_towards_marginal + self.fit_posthoc_tree_coefs = fit_posthoc_tree_coefs self.boosting_strategy = boosting_strategy self.validation_frac = validation_frac self.random_state = random_state @@ -522,6 +577,7 @@

    Params

    sample_weight, test_size=self.validation_frac, random_state=self.random_state, + stratify=y if isinstance(self, ClassifierMixin) else None, ) self.estimators_marginal = [] @@ -545,6 +601,9 @@

    Params

    sample_weight_val, ) + if self.fit_posthoc_tree_coefs is not None: + self._fit_posthoc_tree_coefs(X_train, y_train, sample_weight_train) + self.mse_val_ = self._calc_mse(X_val, y_val, sample_weight_val) return self @@ -569,7 +628,8 @@

    Params

    ) est.fit(X_, residuals_train, sample_weight=sample_weight_train) if self.reg_param_marginal > 0: - est = imodels.HSTreeRegressor(est, reg_param=self.reg_param_marginal) + est = imodels.HSTreeRegressor( + est, reg_param=self.reg_param_marginal) self.estimators_marginal.append(est) if ( @@ -579,9 +639,11 @@

    Params

    if self.fit_linear_marginal.lower() == "ridge": linear_marginal = RidgeCV(fit_intercept=False) elif self.fit_linear_marginal == "NNLS": - linear_marginal = LinearRegression(fit_intercept=False, positive=True) + linear_marginal = LinearRegression( + fit_intercept=False, positive=True) linear_marginal.fit( - np.array([est.predict(X_train) for est in self.estimators_marginal]).T, + np.array([est.predict(X_train) + for est in self.estimators_marginal]).T, residuals_train, sample_weight_train, ) @@ -618,12 +680,14 @@

    Params

    ) est.fit(X_, residuals_train, sample_weight=sample_weight_train) succesfully_split_on_feature = np.all( - (est.tree_.feature[0] == feature_num) | (est.tree_.feature[0] == -2) + (est.tree_.feature[0] == feature_num) | ( + est.tree_.feature[0] == -2) ) if not succesfully_split_on_feature: continue if self.reg_param > 0: - est = imodels.HSTreeRegressor(est, reg_param=self.reg_param) + est = imodels.HSTreeRegressor( + est, reg_param=self.reg_param) self.estimators_.append(est) residuals_train_new = ( residuals_train - self.learning_rate * est.predict(X_train) @@ -635,20 +699,23 @@

    Params

    X_train, y_train, sample_weight_train ) # don't add each estimator for greedy - boosting_round_ests.append(deepcopy(self.estimators_.pop())) + boosting_round_ests.append( + deepcopy(self.estimators_.pop())) boosting_round_mses.append(mse_train_new) if self.boosting_strategy == "greedy": best_est = boosting_round_ests[np.argmin(boosting_round_mses)] self.estimators_.append(best_est) residuals_train = ( - residuals_train - self.learning_rate * best_est.predict(X_train) + residuals_train - self.learning_rate * + best_est.predict(X_train) ) # decay marginal effects if self.decay_rate_towards_marginal < 1.0: new_decay_coefs = [self.decay_rate_towards_marginal] * ( - len(self.estimators_) - len(self.decay_coef_towards_marginal_) + len(self.estimators_) - + len(self.decay_coef_towards_marginal_) ) # print(self.decay_coef_towards_marginal_) # print('new_decay_coefs', new_decay_coefs) @@ -666,6 +733,25 @@

    Params

    else: mse_val = mse_val_new + def _fit_posthoc_tree_coefs(self, X, y, sample_weight=None): + # extract predictions from each tree + X_pred_tree = np.array([est.predict(X) for est in self.estimators_]).T + print('shapes', X.shape, X_pred_tree.shape, + y.shape, len(self.estimators_)) + + coef_prior = np.ones(len(self.estimators_)) * self.learning_rate + y = y - self.bias_ - X_pred_tree @ coef_prior + + if self.fit_posthoc_tree_coefs.lower() == "ridge": + m = RidgeCV(fit_intercept=False) + elif self.fit_posthoc_tree_coefs.lower() == "nnls": + m = LinearRegression(fit_intercept=False, positive=True) + elif self.fit_posthoc_tree_coefs.lower() == "elasticnet": + m = ElasticNetCV(fit_intercept=False, positive=True) + + m.fit(X_pred_tree, y, sample_weight=sample_weight) + self.cyclic_coef_ = m.coef_ + coef_prior + def predict_proba(self, X, marginal_only=False): """ Params @@ -676,22 +762,33 @@

    Params

    X = check_array(X, accept_sparse=False, dtype=None) check_is_fitted(self) probs1 = np.ones(X.shape[0]) * self.bias_ + + # marginal prediction for i, est in enumerate(self.estimators_marginal): probs1 += est.predict(X) * self.marginal_coef_[i] + + # cyclic coefs prediction if not marginal_only: + if not hasattr(self, "cyclic_coef_"): + cyclic_coef_ = np.ones( + len(self.estimators_)) * self.learning_rate + else: + cyclic_coef_ = self.cyclic_coef_ + # print('coef', cyclic_coef_) + if self.decay_rate_towards_marginal < 1.0: for i, est in enumerate(self.estimators_): if i < len(self.decay_coef_towards_marginal_): probs1 += ( - self.learning_rate + cyclic_coef_[i] * self.decay_coef_towards_marginal_[i] * est.predict(X) ) else: - probs1 += self.learning_rate * est.predict(X) + probs1 += cyclic_coef_[i] * est.predict(X) else: - for est in self.estimators_: - probs1 += self.learning_rate * est.predict(X) + for i, est in enumerate(self.estimators_): + probs1 += cyclic_coef_[i] * est.predict(X) probs1 = np.clip(probs1, a_min=0, a_max=1) return np.array([1 - probs1, probs1]).T @@ -750,6 +847,7 @@

    Methods

    sample_weight, test_size=self.validation_frac, random_state=self.random_state, + stratify=y if isinstance(self, ClassifierMixin) else None, ) self.estimators_marginal = [] @@ -773,6 +871,9 @@

    Methods

    sample_weight_val, ) + if self.fit_posthoc_tree_coefs is not None: + self._fit_posthoc_tree_coefs(X_train, y_train, sample_weight_train) + self.mse_val_ = self._calc_mse(X_val, y_val, sample_weight_val) return self @@ -815,22 +916,33 @@

    Methods

    X = check_array(X, accept_sparse=False, dtype=None) check_is_fitted(self) probs1 = np.ones(X.shape[0]) * self.bias_ + + # marginal prediction for i, est in enumerate(self.estimators_marginal): probs1 += est.predict(X) * self.marginal_coef_[i] + + # cyclic coefs prediction if not marginal_only: + if not hasattr(self, "cyclic_coef_"): + cyclic_coef_ = np.ones( + len(self.estimators_)) * self.learning_rate + else: + cyclic_coef_ = self.cyclic_coef_ + # print('coef', cyclic_coef_) + if self.decay_rate_towards_marginal < 1.0: for i, est in enumerate(self.estimators_): if i < len(self.decay_coef_towards_marginal_): probs1 += ( - self.learning_rate + cyclic_coef_[i] * self.decay_coef_towards_marginal_[i] * est.predict(X) ) else: - probs1 += self.learning_rate * est.predict(X) + probs1 += cyclic_coef_[i] * est.predict(X) else: - for est in self.estimators_: - probs1 += self.learning_rate * est.predict(X) + for i, est in enumerate(self.estimators_): + probs1 += cyclic_coef_[i] * est.predict(X) probs1 = np.clip(probs1, a_min=0, a_max=1) return np.array([1 - probs1, probs1]).T @@ -1082,7 +1194,7 @@

    Returns

    class TreeGAMClassifier -(n_boosting_rounds=100, max_leaf_nodes=3, reg_param=0.0, learning_rate: float = 0.01, n_boosting_rounds_marginal=0, max_leaf_nodes_marginal=2, reg_param_marginal=0.0, fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, boosting_strategy='cyclic', validation_frac=0.15, random_state=None) +(n_boosting_rounds=100, max_leaf_nodes=3, reg_param=0.0, learning_rate: float = 0.01, n_boosting_rounds_marginal=0, max_leaf_nodes_marginal=2, reg_param_marginal=0.0, fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, fit_posthoc_tree_coefs=None, boosting_strategy='cyclic', validation_frac=0.15, random_state=None)

    Tree-based GAM classifier. @@ -1118,6 +1230,8 @@

    Params

    1 means no decay, 0 means only use marginal effects shape = (1 - decay_rate_towards_marginal) * shape + decay_rate_towards_marginal * marginal_shape The way this is implemented is by keeping track of how many times to multiply decay_rate_towards_marginal for each cyclic estimator +fit_posthoc_tree_coefs: str [None, "ridge"] +Whether to fit a linear model to the tree coefficients after fitting the cyclic boosting. boosting_strategy : str ["cyclic", "greedy"] Whether to use cyclic boosting (cycle over features) or greedy boosting (select best feature at each step) validation_frac: float @@ -1236,7 +1350,7 @@

    Inherited members

    class TreeGAMRegressor -(n_boosting_rounds=100, max_leaf_nodes=3, reg_param=0.0, learning_rate: float = 0.01, n_boosting_rounds_marginal=0, max_leaf_nodes_marginal=2, reg_param_marginal=0.0, fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, boosting_strategy='cyclic', validation_frac=0.15, random_state=None) +(n_boosting_rounds=100, max_leaf_nodes=3, reg_param=0.0, learning_rate: float = 0.01, n_boosting_rounds_marginal=0, max_leaf_nodes_marginal=2, reg_param_marginal=0.0, fit_linear_marginal=None, select_linear_marginal=False, decay_rate_towards_marginal=1.0, fit_posthoc_tree_coefs=None, boosting_strategy='cyclic', validation_frac=0.15, random_state=None)

    Tree-based GAM classifier. @@ -1272,6 +1386,8 @@

    Params

    1 means no decay, 0 means only use marginal effects shape = (1 - decay_rate_towards_marginal) * shape + decay_rate_towards_marginal * marginal_shape The way this is implemented is by keeping track of how many times to multiply decay_rate_towards_marginal for each cyclic estimator +fit_posthoc_tree_coefs: str [None, "ridge"] +Whether to fit a linear model to the tree coefficients after fitting the cyclic boosting. boosting_strategy : str ["cyclic", "greedy"] Whether to use cyclic boosting (cycle over features) or greedy boosting (select best feature at each step) validation_frac: float diff --git a/docs/index.html b/docs/index.html index bbb8576a..bd4fb775 100644 --- a/docs/index.html +++ b/docs/index.html @@ -394,7 +394,7 @@

    Support for different tasks

    AutoML model AutoInterpretableClassifier️ - +AutoInterpretableRegressor️ diff --git a/docs/util/automl.html b/docs/util/automl.html index 9ac34fff..d462d000 100644 --- a/docs/util/automl.html +++ b/docs/util/automl.html @@ -28,33 +28,78 @@ TreeGAMClassifier, FIGSClassifier, HSTreeClassifier, + RuleFitRegressor, + TreeGAMRegressor, + FIGSRegressor, + HSTreeRegressor, ) -from sklearn.tree import DecisionTreeClassifier -from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.linear_model import LogisticRegression, ElasticNet, Ridge import imodels from sklearn.model_selection import GridSearchCV, train_test_split import numpy as np from sklearn.pipeline import Pipeline +# PARAM_GRID_LINEAR = [ +# { +# "est": [] +# } +# ] -class AutoInterpretableClassifier(BaseEstimator, ClassifierMixin): + +class AutoInterpretableModel(BaseEstimator): """Automatically fit and select a classifier that is interpretable. Note that all preprocessing should be done beforehand. This is basically a wrapper around GridSearchCV, with some preselected models. """ - PARAM_GRID_DEFAULT = [ + def __init__(self, param_grid=None): + if param_grid is None: + if isinstance(self, ClassifierMixin): + self.param_grid = self.PARAM_GRID_DEFAULT_CLASSIFICATION + elif isinstance(self, RegressorMixin): + self.param_grid = self.PARAM_GRID_DEFAULT_REGRESSION + else: + self.param_grid = param_grid + + def fit(self, X, y, cv=5): + self.pipe_ = Pipeline([("est", BaseEstimator())] + ) # Placeholder Estimator + self.est_ = GridSearchCV( + self.pipe_, self.param_grid, scoring="roc_auc", cv=cv) + self.est_.fit(X, y) + return self + + def predict(self, X): + return self.est_.predict(X) + + def predict_proba(self, X): + return self.est_.predict_proba(X) + + def score(self, X, y): + return self.est_.score(X, y) + + PARAM_GRID_LINEAR_CLASSIFICATION = [ { - "est": [DecisionTreeClassifier()], - "est__max_leaf_nodes": [2, 5, 10], + "est": [ + LogisticRegression( + solver="saga", penalty="elasticnet", max_iter=100) + ], + "est__C": [0.1, 1, 10], + "est__l1_ratio": [0.5, 1], }, { "est": [ - LogisticRegression(solver="saga", penalty="elasticnet", max_iter=100) + Ridge(max_iter=100) ], - "est__C": [0.1, 1, 10], - "est__l1_ratio": [0, 0.5, 1], - # "est__penalty": ["l1", "l2", "elasticnet"], + "est__alpha": [0, 0.1, 1, 10], + }, + ] + + PARAM_GRID_DEFAULT_CLASSIFICATION = [ + { + "est": [DecisionTreeClassifier()], + "est__max_leaf_nodes": [2, 5, 10], }, { "est": [RuleFitClassifier()], @@ -73,28 +118,50 @@ "est": [FIGSClassifier()], "est__max_rules": [5, 10], }, + ] + PARAM_GRID_LINEAR_CLASSIFICATION + + PARAM_GRID_LINEAR_REGRESSION = [ + { + "est": [ + ElasticNet(max_iter=100) + ], + "est__alpha": [0.1, 1, 10], + "est__l1_ratio": [0, 0.5, 1], + }, ] - def __init__(self, param_grid=None): - if param_grid is None: - self.param_grid_ = self.PARAM_GRID_DEFAULT - else: - self.param_grid_ = param_grid + PARAM_GRID_DEFAULT_REGRESSION = [ + { + "est": [DecisionTreeRegressor()], + "est__max_leaf_nodes": [2, 5, 10], + }, + { + "est": [HSTreeRegressor()], + "est__max_leaf_nodes": [5, 10], + }, - def fit(self, X, y, cv=5): - self.pipe_ = Pipeline([("est", BaseEstimator())]) # Placeholder Estimator - self.est_ = GridSearchCV(self.pipe_, self.param_grid_, scoring="roc_auc", cv=cv) - self.est_.fit(X, y) - return self + { + "est": [RuleFitRegressor()], + "est__max_rules": [10, 100], + "est__n_estimators": [20], + }, + { + "est": [TreeGAMRegressor()], + "est__n_boosting_rounds": [10, 100], + }, + { + "est": [FIGSRegressor()], + "est__max_rules": [5, 10], + }, + ] + PARAM_GRID_LINEAR_REGRESSION - def predict(self, X): - return self.est_.predict(X) - def predict_proba(self, X): - return self.est_.predict_proba(X) +class AutoInterpretableClassifier(AutoInterpretableModel, ClassifierMixin): + ... - def score(self, X, y): - return self.est_.score(X, y) + +class AutoInterpretableRegressor(AutoInterpretableModel, RegressorMixin): + ... if __name__ == "__main__": @@ -106,6 +173,7 @@ ) m = AutoInterpretableClassifier() + # m = AutoInterpretableRegressor() m.fit(X_train, y_train) print("best params", m.est_.best_params_) @@ -135,24 +203,90 @@

    Classes

    Expand source code -
    class AutoInterpretableClassifier(BaseEstimator, ClassifierMixin):
    +
    class AutoInterpretableClassifier(AutoInterpretableModel, ClassifierMixin):
    +    ...
    + +

    Ancestors

    +
      +
    • AutoInterpretableModel
    • +
    • sklearn.base.BaseEstimator
    • +
    • sklearn.utils._metadata_requests._MetadataRequester
    • +
    • sklearn.base.ClassifierMixin
    • +
    +

    Inherited members

    + +
    +
    +class AutoInterpretableModel +(param_grid=None) +
    +
    +

    Automatically fit and select a classifier that is interpretable. +Note that all preprocessing should be done beforehand. +This is basically a wrapper around GridSearchCV, with some preselected models.

    +
    + +Expand source code + +
    class AutoInterpretableModel(BaseEstimator):
         """Automatically fit and select a classifier that is interpretable.
         Note that all preprocessing should be done beforehand.
         This is basically a wrapper around GridSearchCV, with some preselected models.
         """
     
    -    PARAM_GRID_DEFAULT = [
    +    def __init__(self, param_grid=None):
    +        if param_grid is None:
    +            if isinstance(self, ClassifierMixin):
    +                self.param_grid = self.PARAM_GRID_DEFAULT_CLASSIFICATION
    +            elif isinstance(self, RegressorMixin):
    +                self.param_grid = self.PARAM_GRID_DEFAULT_REGRESSION
    +        else:
    +            self.param_grid = param_grid
    +
    +    def fit(self, X, y, cv=5):
    +        self.pipe_ = Pipeline([("est", BaseEstimator())]
    +                              )  # Placeholder Estimator
    +        self.est_ = GridSearchCV(
    +            self.pipe_, self.param_grid, scoring="roc_auc", cv=cv)
    +        self.est_.fit(X, y)
    +        return self
    +
    +    def predict(self, X):
    +        return self.est_.predict(X)
    +
    +    def predict_proba(self, X):
    +        return self.est_.predict_proba(X)
    +
    +    def score(self, X, y):
    +        return self.est_.score(X, y)
    +
    +    PARAM_GRID_LINEAR_CLASSIFICATION = [
             {
    -            "est": [DecisionTreeClassifier()],
    -            "est__max_leaf_nodes": [2, 5, 10],
    +            "est": [
    +                LogisticRegression(
    +                    solver="saga", penalty="elasticnet", max_iter=100)
    +            ],
    +            "est__C": [0.1, 1, 10],
    +            "est__l1_ratio": [0.5, 1],
             },
             {
                 "est": [
    -                LogisticRegression(solver="saga", penalty="elasticnet", max_iter=100)
    +                Ridge(max_iter=100)
                 ],
    -            "est__C": [0.1, 1, 10],
    -            "est__l1_ratio": [0, 0.5, 1],
    -            # "est__penalty": ["l1", "l2", "elasticnet"],
    +            "est__alpha": [0, 0.1, 1, 10],
    +        },
    +    ]
    +
    +    PARAM_GRID_DEFAULT_CLASSIFICATION = [
    +        {
    +            "est": [DecisionTreeClassifier()],
    +            "est__max_leaf_nodes": [2, 5, 10],
             },
             {
                 "est": [RuleFitClassifier()],
    @@ -171,45 +305,75 @@ 

    Classes

    "est": [FIGSClassifier()], "est__max_rules": [5, 10], }, - ] - - def __init__(self, param_grid=None): - if param_grid is None: - self.param_grid_ = self.PARAM_GRID_DEFAULT - else: - self.param_grid_ = param_grid - - def fit(self, X, y, cv=5): - self.pipe_ = Pipeline([("est", BaseEstimator())]) # Placeholder Estimator - self.est_ = GridSearchCV(self.pipe_, self.param_grid_, scoring="roc_auc", cv=cv) - self.est_.fit(X, y) - return self + ] + PARAM_GRID_LINEAR_CLASSIFICATION - def predict(self, X): - return self.est_.predict(X) + PARAM_GRID_LINEAR_REGRESSION = [ + { + "est": [ + ElasticNet(max_iter=100) + ], + "est__alpha": [0.1, 1, 10], + "est__l1_ratio": [0, 0.5, 1], + }, + ] - def predict_proba(self, X): - return self.est_.predict_proba(X) + PARAM_GRID_DEFAULT_REGRESSION = [ + { + "est": [DecisionTreeRegressor()], + "est__max_leaf_nodes": [2, 5, 10], + }, + { + "est": [HSTreeRegressor()], + "est__max_leaf_nodes": [5, 10], + }, - def score(self, X, y): - return self.est_.score(X, y)
    + { + "est": [RuleFitRegressor()], + "est__max_rules": [10, 100], + "est__n_estimators": [20], + }, + { + "est": [TreeGAMRegressor()], + "est__n_boosting_rounds": [10, 100], + }, + { + "est": [FIGSRegressor()], + "est__max_rules": [5, 10], + }, + ] + PARAM_GRID_LINEAR_REGRESSION

    Ancestors

    • sklearn.base.BaseEstimator
    • sklearn.utils._metadata_requests._MetadataRequester
    • -
    • sklearn.base.ClassifierMixin
    • +
    +

    Subclasses

    +

    Class variables

    -
    var PARAM_GRID_DEFAULT
    +
    var PARAM_GRID_DEFAULT_CLASSIFICATION
    +
    +
    +
    +
    var PARAM_GRID_DEFAULT_REGRESSION
    +
    +
    +
    +
    var PARAM_GRID_LINEAR_CLASSIFICATION
    +
    +
    +
    +
    var PARAM_GRID_LINEAR_REGRESSION

    Methods

    -
    +
    def fit(self, X, y, cv=5)
    @@ -219,13 +383,15 @@

    Methods

    Expand source code
    def fit(self, X, y, cv=5):
    -    self.pipe_ = Pipeline([("est", BaseEstimator())])  # Placeholder Estimator
    -    self.est_ = GridSearchCV(self.pipe_, self.param_grid_, scoring="roc_auc", cv=cv)
    +    self.pipe_ = Pipeline([("est", BaseEstimator())]
    +                          )  # Placeholder Estimator
    +    self.est_ = GridSearchCV(
    +        self.pipe_, self.param_grid, scoring="roc_auc", cv=cv)
         self.est_.fit(X, y)
         return self
    -
    +
    def predict(self, X)
    @@ -238,7 +404,7 @@

    Methods

    return self.est_.predict(X)
    -
    +
    def predict_proba(self, X)
    @@ -251,28 +417,11 @@

    Methods

    return self.est_.predict_proba(X)
    -
    +
    def score(self, X, y)
    -

    Return the mean accuracy on the given test data and labels.

    -

    In multi-label classification, this is the subset accuracy -which is a harsh metric since you require for each sample that -each label set be correctly predicted.

    -

    Parameters

    -
    -
    X : array-like of shape (n_samples, n_features)
    -
    Test samples.
    -
    y : array-like of shape (n_samples,) or (n_samples, n_outputs)
    -
    True labels for X.
    -
    sample_weight : array-like of shape (n_samples,), default=None
    -
    Sample weights.
    -
    -

    Returns

    -
    -
    score : float
    -
    Mean accuracy of self.predict(X) w.r.t. y.
    -
    +
    Expand source code @@ -281,8 +430,8 @@

    Returns

    return self.est_.score(X, y)
    -
    -def set_fit_request(self: AutoInterpretableClassifier, *, cv: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> AutoInterpretableClassifier +
    +def set_fit_request(self: AutoInterpretableModel, *, cv: Union[bool, ForwardRef(None), str] = '$UNCHANGED$') ‑> AutoInterpretableModel

    Request metadata passed to the fit method.

    @@ -364,6 +513,37 @@

    Returns

    +
    +class AutoInterpretableRegressor +(param_grid=None) +
    +
    +

    Automatically fit and select a classifier that is interpretable. +Note that all preprocessing should be done beforehand. +This is basically a wrapper around GridSearchCV, with some preselected models.

    +
    + +Expand source code + +
    class AutoInterpretableRegressor(AutoInterpretableModel, RegressorMixin):
    +    ...
    +
    +

    Ancestors

    +
      +
    • AutoInterpretableModel
    • +
    • sklearn.base.BaseEstimator
    • +
    • sklearn.utils._metadata_requests._MetadataRequester
    • +
    • sklearn.base.RegressorMixin
    • +
    +

    Inherited members

    + +
    @@ -382,15 +562,24 @@

    Index 🔍

    diff --git a/imodels/util/automl.py b/imodels/util/automl.py index c790df7c..dcba66ad 100644 --- a/imodels/util/automl.py +++ b/imodels/util/automl.py @@ -5,34 +5,78 @@ TreeGAMClassifier, FIGSClassifier, HSTreeClassifier, + RuleFitRegressor, + TreeGAMRegressor, + FIGSRegressor, + HSTreeRegressor, ) -from sklearn.tree import DecisionTreeClassifier -from sklearn.linear_model import LogisticRegression +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor +from sklearn.linear_model import LogisticRegression, ElasticNet, Ridge import imodels from sklearn.model_selection import GridSearchCV, train_test_split import numpy as np from sklearn.pipeline import Pipeline +# PARAM_GRID_LINEAR = [ +# { +# "est": [] +# } +# ] -class AutoInterpretableClassifier(BaseEstimator, ClassifierMixin): + +class AutoInterpretableModel(BaseEstimator): """Automatically fit and select a classifier that is interpretable. Note that all preprocessing should be done beforehand. This is basically a wrapper around GridSearchCV, with some preselected models. """ - PARAM_GRID_DEFAULT = [ - { - "est": [DecisionTreeClassifier()], - "est__max_leaf_nodes": [2, 5, 10], - }, + def __init__(self, param_grid=None): + if param_grid is None: + if isinstance(self, ClassifierMixin): + self.param_grid = self.PARAM_GRID_DEFAULT_CLASSIFICATION + elif isinstance(self, RegressorMixin): + self.param_grid = self.PARAM_GRID_DEFAULT_REGRESSION + else: + self.param_grid = param_grid + + def fit(self, X, y, cv=5): + self.pipe_ = Pipeline([("est", BaseEstimator())] + ) # Placeholder Estimator + self.est_ = GridSearchCV( + self.pipe_, self.param_grid, scoring="roc_auc", cv=cv) + self.est_.fit(X, y) + return self + + def predict(self, X): + return self.est_.predict(X) + + def predict_proba(self, X): + return self.est_.predict_proba(X) + + def score(self, X, y): + return self.est_.score(X, y) + + PARAM_GRID_LINEAR_CLASSIFICATION = [ { "est": [ LogisticRegression( solver="saga", penalty="elasticnet", max_iter=100) ], "est__C": [0.1, 1, 10], - "est__l1_ratio": [0, 0.5, 1], - # "est__penalty": ["l1", "l2", "elasticnet"], + "est__l1_ratio": [0.5, 1], + }, + { + "est": [ + Ridge(max_iter=100) + ], + "est__alpha": [0, 0.1, 1, 10], + }, + ] + + PARAM_GRID_DEFAULT_CLASSIFICATION = [ + { + "est": [DecisionTreeClassifier()], + "est__max_leaf_nodes": [2, 5, 10], }, { "est": [RuleFitClassifier()], @@ -51,30 +95,50 @@ class AutoInterpretableClassifier(BaseEstimator, ClassifierMixin): "est": [FIGSClassifier()], "est__max_rules": [5, 10], }, + ] + PARAM_GRID_LINEAR_CLASSIFICATION + + PARAM_GRID_LINEAR_REGRESSION = [ + { + "est": [ + ElasticNet(max_iter=100) + ], + "est__alpha": [0.1, 1, 10], + "est__l1_ratio": [0, 0.5, 1], + }, ] - def __init__(self, param_grid=None): - if param_grid is None: - self.param_grid = self.PARAM_GRID_DEFAULT - else: - self.param_grid = param_grid + PARAM_GRID_DEFAULT_REGRESSION = [ + { + "est": [DecisionTreeRegressor()], + "est__max_leaf_nodes": [2, 5, 10], + }, + { + "est": [HSTreeRegressor()], + "est__max_leaf_nodes": [5, 10], + }, - def fit(self, X, y, cv=5): - self.pipe_ = Pipeline([("est", BaseEstimator())] - ) # Placeholder Estimator - self.est_ = GridSearchCV( - self.pipe_, self.param_grid, scoring="roc_auc", cv=cv) - self.est_.fit(X, y) - return self + { + "est": [RuleFitRegressor()], + "est__max_rules": [10, 100], + "est__n_estimators": [20], + }, + { + "est": [TreeGAMRegressor()], + "est__n_boosting_rounds": [10, 100], + }, + { + "est": [FIGSRegressor()], + "est__max_rules": [5, 10], + }, + ] + PARAM_GRID_LINEAR_REGRESSION - def predict(self, X): - return self.est_.predict(X) - def predict_proba(self, X): - return self.est_.predict_proba(X) +class AutoInterpretableClassifier(AutoInterpretableModel, ClassifierMixin): + ... - def score(self, X, y): - return self.est_.score(X, y) + +class AutoInterpretableRegressor(AutoInterpretableModel, RegressorMixin): + ... if __name__ == "__main__": @@ -86,6 +150,7 @@ def score(self, X, y): ) m = AutoInterpretableClassifier() + # m = AutoInterpretableRegressor() m.fit(X_train, y_train) print("best params", m.est_.best_params_) diff --git a/readme.md b/readme.md index 4551a95c..864bb247 100644 --- a/readme.md +++ b/readme.md @@ -180,7 +180,7 @@ Different models support different machine-learning tasks. Current support for d | Greedy tree sums (FIGS) | [FIGSClassifier](https://csinva.io/imodels/tree/figs.html#imodels.tree.figs.FIGSClassifier) | [FIGSRegressor](https://csinva.io/imodels/tree/figs.html#imodels.tree.figs.FIGSRegressor) | | | Hierarchical shrinkage | [HSTreeClassifierCV](https://csinva.io/imodels/tree/hierarchical_shrinkage.html#imodels.tree.hierarchical_shrinkage.HSTreeClassifierCV) | [HSTreeRegressorCV](https://csinva.io/imodels/tree/hierarchical_shrinkage.html#imodels.tree.hierarchical_shrinkage.HSTreeRegressorCV) | Wraps any sklearn tree-based model | | Distillation | | [DistilledRegressor](https://csinva.io/imodels/util/distillation.html#imodels.util.distillation.DistilledRegressor) | Wraps any sklearn-compatible models | -| AutoML model | [AutoInterpretableClassifier️](https://csinva.io/imodels/util/automl.html) | | | +| AutoML model | [AutoInterpretableClassifier️](https://csinva.io/imodels/util/automl.html) | [AutoInterpretableRegressor️](https://csinva.io/imodels/util/automl.html) | | ### Extras diff --git a/setup.py b/setup.py index b2f13255..7c3f11f9 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ setuptools.setup( name="imodels", - version="1.4.0", + version="1.4.1", author="Chandan Singh, Keyan Nasseri, Matthew Epland, Yan Shuo Tan, Omer Ronen, Tiffany Tang, Abhineet Agarwal, Theo Saarinen, Bin Yu, and others", author_email="chandan_singh@berkeley.edu", description="Implementations of various interpretable models",