From c18078ecb919597bdaaae0ca4d521093274f02f2 Mon Sep 17 00:00:00 2001
From: Chandan Singh
Date: Tue, 3 Jan 2023 11:34:32 -0500
Subject: [PATCH] docs reupdate
---
docs/discretization/discretizer.html | 20 +-
docs/discretization/index.html | 2 +-
docs/discretization/mdlp.html | 8 +-
docs/experimental/bartpy/data.html | 4 +-
.../bartpy/diagnostics/diagnostics.html | 14 +-
.../bartpy/diagnostics/motivation.html | 4 +-
.../bartpy/diagnostics/residuals.html | 10 +-
.../bartpy/diagnostics/sampling.html | 10 +-
.../initializers/sklearntreeinitializer.html | 22 +-
docs/experimental/bartpy/model.html | 8 +-
.../bartpy/samplers/leafnode.html | 30 +-
.../bartpy/samplers/oblivioustrees/index.html | 4 +-
.../oblivioustrees/likelihoodratio.html | 562 ++++++++++++++
.../samplers/oblivioustrees/treemutation.html | 52 +-
.../bartpy/samplers/treemutation.html | 126 +--
.../samplers/unconstrainedtree/index.html | 4 +-
.../unconstrainedtree/likelihoodratio.html | 721 ++++++++++++++++++
.../unconstrainedtree/treemutation.html | 52 +-
docs/experimental/bartpy/sklearnmodel.html | 12 +-
docs/experimental/figs_ensembles.html | 4 +-
.../bayesian_rule_list/brl_util.html | 48 +-
docs/rule_set/brs.html | 12 +-
docs/rule_set/slipper_util.html | 32 +-
docs/tree/cart_ccp.html | 4 +-
docs/tree/gosdt/pygosdt.html | 8 +-
docs/tree/gosdt/pygosdt_helper.html | 28 +-
docs/tree/hierarchical_shrinkage.html | 127 ++-
docs/util/neural_nets.html | 4 +-
28 files changed, 1649 insertions(+), 283 deletions(-)
create mode 100644 docs/experimental/bartpy/samplers/oblivioustrees/likelihoodratio.html
create mode 100644 docs/experimental/bartpy/samplers/unconstrainedtree/likelihoodratio.html
diff --git a/docs/discretization/discretizer.html b/docs/discretization/discretizer.html
index 40426bb1..36f0bec7 100644
--- a/docs/discretization/discretizer.html
+++ b/docs/discretization/discretizer.html
@@ -183,7 +183,7 @@
max values for the range of x
keep_pointwise_bins : boolean
- If True, treat duplicate bin_edges as a pointiwse bin,
+ If True, treat duplicate bin_edges as a pointwise bin,
i.e., [a, a]. If False, these bins are in effect ignored.
Returns
@@ -480,7 +480,7 @@
manual_discretizer_ : dictionary
Provides bin_edges to feed into _quantile_discretization()
- and do quantile discreization manually for features where
+ and do quantile discretization manually for features where
KBinsDiscretizer() failed. Ignored if strategy != 'quantile'
or no errors in KBinsDiscretizer().
@@ -515,7 +515,7 @@
self
"""
- # initalization and error checking
+ # initialization and error checking
self._fit_preprocessing(X)
# apply KBinsDiscretizer to the selected columns
@@ -734,7 +734,7 @@
Parameters
----------
- X : data frame of shape (n_samples, n_fatures)
+ X : data frame of shape (n_samples, n_features)
Training data used to fit RF
y : array-like of shape (n_samples,)
@@ -1119,7 +1119,7 @@
Params
max values for the range of x
keep_pointwise_bins : boolean
- If True, treat duplicate bin_edges as a pointiwse bin,
+ If True, treat duplicate bin_edges as a pointwise bin,
i.e., [a, a]. If False, these bins are in effect ignored.
Returns
@@ -1287,7 +1287,7 @@
Attributes
Primary discretization method used to bin numeric data
manual_discretizer_ : dictionary
Provides bin_edges to feed into _quantile_discretization()
-and do quantile discreization manually for features where
+and do quantile discretization manually for features where
KBinsDiscretizer() failed. Ignored if strategy != 'quantile'
or no errors in KBinsDiscretizer().
onehot_ : object of class OneHotEncoder()
@@ -1352,7 +1352,7 @@
Examples
manual_discretizer_ : dictionary
Provides bin_edges to feed into _quantile_discretization()
- and do quantile discreization manually for features where
+ and do quantile discretization manually for features where
KBinsDiscretizer() failed. Ignored if strategy != 'quantile'
or no errors in KBinsDiscretizer().
@@ -1387,7 +1387,7 @@
Examples
self
"""
- # initalization and error checking
+ # initialization and error checking
self._fit_preprocessing(X)
# apply KBinsDiscretizer to the selected columns
@@ -1509,7 +1509,7 @@
Returns
self
"""
- # initalization and error checking
+ # initialization and error checking
self._fit_preprocessing(X)
# apply KBinsDiscretizer to the selected columns
@@ -2135,7 +2135,7 @@
Attributes
Parameters
----------
- X : data frame of shape (n_samples, n_fatures)
+ X : data frame of shape (n_samples, n_features)
Training data used to fit RF
y : array-like of shape (n_samples,)
diff --git a/docs/discretization/index.html b/docs/discretization/index.html
index 6ee76721..a4fbbaec 100644
--- a/docs/discretization/index.html
+++ b/docs/discretization/index.html
@@ -36,7 +36,7 @@
Python implementation of Fayyad and Irani's MDLP criterion discretiation algorithm
+
Python implementation of Fayyad and Irani's MDLP criterion discretization algorithm
Reference:
Irani, Keki B. "Multi-interval discretization of continuous-valued attributes for classification learning." (1993).
@@ -27,7 +27,7 @@
Discretization MDLP
'''
# Discretization MDLP
-Python implementation of Fayyad and Irani's MDLP criterion discretiation algorithm
+Python implementation of Fayyad and Irani's MDLP criterion discretization algorithm
**Reference:**
Irani, Keki B. "Multi-interval discretization of continuous-valued attributes for classification learning." (1993).
@@ -138,7 +138,7 @@
Discretization MDLP
'''
Given an attribute, find all potential cut_points (boundary points)
:param feature: feature of interest
- :param partition_index: indices of rows for which feature value falls whithin interval of interest
+ :param partition_index: indices of rows for which feature value falls within interval of interest
:return: array with potential cut_points
'''
# get dataframe with only rows of interest, and feature and class columns
@@ -839,7 +839,7 @@
Params
'''
Given an attribute, find all potential cut_points (boundary points)
:param feature: feature of interest
- :param partition_index: indices of rows for which feature value falls whithin interval of interest
+ :param partition_index: indices of rows for which feature value falls within interval of interest
:return: array with potential cut_points
'''
# get dataframe with only rows of interest, and feature and class columns
diff --git a/docs/experimental/bartpy/data.html b/docs/experimental/bartpy/data.html
index 3f236b37..f94693a0 100644
--- a/docs/experimental/bartpy/data.html
+++ b/docs/experimental/bartpy/data.html
@@ -99,7 +99,7 @@
self._n_features = X.shape[1]
self._mask = mask
- # Cache iniialization
+ # Cache initialization
if unique_columns is not None:
self._unique_columns = [x if x is True else None for x in unique_columns]
else:
@@ -524,7 +524,7 @@
Classes
self._n_features = X.shape[1]
self._mask = mask
- # Cache iniialization
+ # Cache initialization
if unique_columns is not None:
self._unique_columns = [x if x is True else None for x in unique_columns]
else:
diff --git a/docs/experimental/bartpy/diagnostics/diagnostics.html b/docs/experimental/bartpy/diagnostics/diagnostics.html
index 3def619c..3d21593b 100644
--- a/docs/experimental/bartpy/diagnostics/diagnostics.html
+++ b/docs/experimental/bartpy/diagnostics/diagnostics.html
@@ -30,8 +30,8 @@
from sklearn.metrics import mean_squared_error
from imodels.util.tree_interaction_utils import get_interacting_features
-from ..diagnostics.residuals import plot_qq, plot_homoskedasity_diagnostics
-from ..diagnostics.sampling import plot_tree_mutation_acceptance_rate, plot_tree_likelihhod, plot_tree_probs
+from ..diagnostics.residuals import plot_qq, plot_homoscedasticity_diagnostics
+from ..diagnostics.sampling import plot_tree_mutation_acceptance_rate, plot_tree_likelihood, plot_tree_probs
from ..diagnostics.sigma import plot_sigma_convergence
from ..diagnostics.trees import plot_tree_depth
from ..initializers.sklearntreeinitializer import SklearnTreeInitializer
@@ -44,9 +44,9 @@
plot_qq(model, ax1)
plot_tree_depth(model, ax2)
plot_sigma_convergence(model, ax3)
- plot_homoskedasity_diagnostics(model, ax4)
+ plot_homoscedasticity_diagnostics(model, ax4)
plot_tree_mutation_acceptance_rate(model, ax5)
- # plot_tree_likelihhod(model, ax6)
+ # plot_tree_likelihood(model, ax6)
# plot_tree_probs(model, ax7)
plt.show()
@@ -108,7 +108,7 @@
# plot_tree_depth(bart_figs, ax2,
# f"FIGS initialization (MSE: {np.round(mean_squared_error(bart_figs_preds, y_test), 4)}"
# f", FIGS MSE: {np.round(mean_squared_error(figs_preds, y_test), 2)})", x_label=True)
- # plt.title(f"Bayesian tree with different initilization of Friedman 1 dataset n={n}")
+ # plt.title(f"Bayesian tree with different initialization of Friedman 1 dataset n={n}")
plt.show()
@@ -138,9 +138,9 @@
The log probability of the particular grow mutation being selected conditional on growing a given tree
+i.e.
+log(P(mutation | node)P(node| tree)
+
+
+Expand source code
+
+
def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) -> float:
+ """
+ The log probability of the particular grow mutation being selected conditional on growing a given tree
+ i.e.
+ log(P(mutation | node)P(node| tree)
+
+ """
+ prob_split_chosen = log_probability_split_within_node(mutation)
+ return prob_split_chosen
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
-
Works by combining a proposer and likihood evaluator into:
+
Works by combining a proposer and likelihood evaluator into:
- propose a mutation
-- assess likihood
-- accept if likihood higher than a uniform(0, 1) draw
+- assess likelihood
+- accept if likelihood higher than a uniform(0, 1) draw
Parameters
proposer : TreeMutationProposer
-
likihood_ratio : TreeMutationLikihoodRatio
+
likelihood_ratio : TreeMutationLikelihoodRatio
@@ -138,28 +138,28 @@
Parameters
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
- Works by combining a proposer and likihood evaluator into:
+ Works by combining a proposer and likelihood evaluator into:
- propose a mutation
- - assess likihood
- - accept if likihood higher than a uniform(0, 1) draw
+ - assess likelihood
+ - accept if likelihood higher than a uniform(0, 1) draw
Parameters
----------
proposer: TreeMutationProposer
- likihood_ratio: TreeMutationLikihoodRatio
+ likelihood_ratio: TreeMutationLikelihoodRatio
"""
def __init__(self,
proposer: TreeMutationProposer,
- likihood_ratio: TreeMutationLikihoodRatio,
+ likelihood_ratio: TreeMutationLikelihoodRatio,
scalar_sampler=UniformScalarSampler()):
self.proposer = proposer
- self.likihood_ratio = likihood_ratio
+ self.likelihood_ratio = likelihood_ratio
self._scalar_sampler = scalar_sampler
def sample(self, model: Model, tree: Tree) -> Optional[List[TreeMutation]]:
proposals: List[TreeMutation] = self.proposer.propose(tree)
- ratio = np.sum([self.likihood_ratio.log_probability_ratio(model, tree, x) for x in proposals])
+ ratio = np.sum([self.likelihood_ratio.log_probability_ratio(model, tree, x) for x in proposals])
if self._scalar_sampler.sample() < ratio:
return proposals
else:
@@ -190,7 +190,7 @@
Methods
def sample(self, model: Model, tree: Tree) -> Optional[List[TreeMutation]]:
proposals: List[TreeMutation] = self.proposer.propose(tree)
- ratio = np.sum([self.likihood_ratio.log_probability_ratio(model, tree, x) for x in proposals])
+ ratio = np.sum([self.likelihood_ratio.log_probability_ratio(model, tree, x) for x in proposals])
if self._scalar_sampler.sample() < ratio:
return proposals
else:
diff --git a/docs/experimental/bartpy/samplers/treemutation.html b/docs/experimental/bartpy/samplers/treemutation.html
index 68ac4741..36c0e056 100644
--- a/docs/experimental/bartpy/samplers/treemutation.html
+++ b/docs/experimental/bartpy/samplers/treemutation.html
@@ -37,10 +37,10 @@
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
- A general schema of implementation is to combine a proposer and likihood evaluator to:
+ A general schema of implementation is to combine a proposer and likelihood evaluator to:
- propose a mutation
- - assess likihood
- - accept if likihood higher than a uniform(0, 1) draw
+ - assess likelihood
+ - accept if likelihood higher than a uniform(0, 1) draw
"""
def sample(self, model: Model, tree: Tree) -> Optional[TreeMutation]:
@@ -74,14 +74,14 @@
raise NotImplementedError()
-class TreeMutationLikihoodRatio(ABC):
+class TreeMutationLikelihoodRatio(ABC):
"""
Responsible for evaluating the ratio of mutations to the reverse movement
"""
def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> Tuple[float, tuple, tuple]:
"""
- Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
+ Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
Main access point for the class
@@ -99,7 +99,7 @@
float
logged ratio of likelihoods
"""
- log_likelihood_ratio, (l_new, l_old) = self.log_likihood_ratio(model, tree, mutation)
+ log_likelihood_ratio, (l_new, l_old) = self.log_likelihood_ratio(model, tree, mutation)
log_transition_ratio, (t_new, t_old) = self.log_transition_ratio(tree, mutation)
log_prior_ratio, (p_new, p_old) = self.log_tree_ratio(model, tree, mutation)
bayes_term = log_transition_ratio + log_prior_ratio
@@ -114,9 +114,9 @@
@abstractmethod
def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) -> float:
"""
- The logged ratio of the likihood of making the transition to the likihood of making the reverse transition.
+ The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition.
e.g. in the case of using only grow and prune mutations:
- log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)
+ log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)
Parameters
----------
@@ -128,14 +128,14 @@
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
@abstractmethod
def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> float:
"""
- Logged ratio of the likihood of the tree before and after the mutation
+ Logged ratio of the likelihood of the tree before and after the mutation
i.e. the product of the probability of all split nodes being split and all leaf node note being split
Parameters
@@ -150,15 +150,15 @@
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
@abstractmethod
- def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
+ def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
"""
- The logged ratio of the likihood of all the data points before and after the mutation
+ The logged ratio of the likelihood of all the data points before and after the mutation
Generally more complex trees should be able to fit the data better than simple trees
Parameters
@@ -173,7 +173,7 @@
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
@@ -187,8 +187,8 @@
Classes
-
-class TreeMutationLikihoodRatio
+
+class TreeMutationLikelihoodRatio
Responsible for evaluating the ratio of mutations to the reverse movement
@@ -196,14 +196,14 @@
Classes
Expand source code
-
class TreeMutationLikihoodRatio(ABC):
+
class TreeMutationLikelihoodRatio(ABC):
"""
Responsible for evaluating the ratio of mutations to the reverse movement
"""
def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> Tuple[float, tuple, tuple]:
"""
- Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
+ Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
Main access point for the class
@@ -221,7 +221,7 @@
@abstractmethod
def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) -> float:
"""
- The logged ratio of the likihood of making the transition to the likihood of making the reverse transition.
+ The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition.
e.g. in the case of using only grow and prune mutations:
- log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)
+ log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)
Parameters
----------
@@ -250,14 +250,14 @@
Classes
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
@abstractmethod
def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> float:
"""
- Logged ratio of the likihood of the tree before and after the mutation
+ Logged ratio of the likelihood of the tree before and after the mutation
i.e. the product of the probability of all split nodes being split and all leaf node note being split
Parameters
@@ -272,15 +272,15 @@
Classes
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
@abstractmethod
- def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
+ def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
"""
- The logged ratio of the likihood of all the data points before and after the mutation
+ The logged ratio of the likelihood of all the data points before and after the mutation
Generally more complex trees should be able to fit the data better than simple trees
Parameters
@@ -295,7 +295,7 @@
Classes
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
The logged ratio of the likihood of all the data points before and after the mutation
+
The logged ratio of the likelihood of all the data points before and after the mutation
Generally more complex trees should be able to fit the data better than simple trees
Parameters
@@ -328,16 +328,16 @@
Parameters
Returns
float
-
logged likihood ratio
+
logged likelihood ratio
Expand source code
@abstractmethod
-def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
+def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
"""
- The logged ratio of the likihood of all the data points before and after the mutation
+ The logged ratio of the likelihood of all the data points before and after the mutation
Generally more complex trees should be able to fit the data better than simple trees
Parameters
@@ -352,16 +352,16 @@
Returns
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
+
Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
Main access point for the class
Parameters
@@ -383,7 +383,7 @@
Returns
def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> Tuple[float, tuple, tuple]:
"""
- Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
+ Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
Main access point for the class
@@ -401,7 +401,7 @@
The logged ratio of the likihood of making the transition to the likihood of making the reverse transition.
+
The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition.
e.g. in the case of using only grow and prune mutations:
-log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)
+log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)
Parameters
tree : Tree
@@ -431,7 +431,7 @@
Parameters
Returns
float
-
logged likihood ratio
+
logged likelihood ratio
@@ -440,9 +440,9 @@
Returns
@abstractmethod
def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) -> float:
"""
- The logged ratio of the likihood of making the transition to the likihood of making the reverse transition.
+ The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition.
e.g. in the case of using only grow and prune mutations:
- log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)
+ log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)
Parameters
----------
@@ -454,16 +454,16 @@
Returns
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
Logged ratio of the likihood of the tree before and after the mutation
+
Logged ratio of the likelihood of the tree before and after the mutation
i.e. the product of the probability of all split nodes being split and all leaf node note being split
Parameters
@@ -477,7 +477,7 @@
Parameters
Returns
float
-
logged likihood ratio
+
logged likelihood ratio
@@ -486,7 +486,7 @@
Returns
@abstractmethod
def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> float:
"""
- Logged ratio of the likihood of the tree before and after the mutation
+ Logged ratio of the likelihood of the tree before and after the mutation
i.e. the product of the probability of all split nodes being split and all leaf node note being split
Parameters
@@ -501,7 +501,7 @@
Returns
Returns
-------
float
- logged likihood ratio
+ logged likelihood ratio
"""
raise NotImplementedError()
@@ -598,10 +598,10 @@
Returns
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
-
A general schema of implementation is to combine a proposer and likihood evaluator to:
+
A general schema of implementation is to combine a proposer and likelihood evaluator to:
- propose a mutation
-- assess likihood
-- accept if likihood higher than a uniform(0, 1) draw
+- assess likelihood
+- accept if likelihood higher than a uniform(0, 1) draw
Expand source code
@@ -611,10 +611,10 @@
Returns
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
- A general schema of implementation is to combine a proposer and likihood evaluator to:
+ A general schema of implementation is to combine a proposer and likelihood evaluator to:
- propose a mutation
- - assess likihood
- - accept if likihood higher than a uniform(0, 1) draw
+ - assess likelihood
+ - accept if likelihood higher than a uniform(0, 1) draw
"""
def sample(self, model: Model, tree: Tree) -> Optional[TreeMutation]:
@@ -675,12 +675,12 @@
The log probability of the particular grow mutation being selected conditional on growing a given tree
+i.e.
+log(P(mutation | node)P(node| tree)
+
+
+Expand source code
+
+
def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) -> float:
+ """
+ The log probability of the particular grow mutation being selected conditional on growing a given tree
+ i.e.
+ log(P(mutation | node)P(node| tree)
+
+ """
+ prob_node_chosen_to_split_on = - np.log(n_splittable_leaf_nodes(tree))
+ prob_split_chosen = log_probability_split_within_node(mutation)
+ return prob_node_chosen_to_split_on + prob_split_chosen
+
+
+
+def n_prunable_decision_nodes(tree: Tree) ‑> int
+
+
+
The number of prunable decision nodes
+i.e. how many decision nodes have two leaf children
+
+
+Expand source code
+
+
def n_prunable_decision_nodes(tree: Tree) -> int:
+ """
+ The number of prunable decision nodes
+ i.e. how many decision nodes have two leaf children
+ """
+ return len(tree.prunable_decision_nodes)
The number of splittable leaf nodes
+i.e. how many leaf nodes have more than one distinct values in their covariate matrix
+
+
+Expand source code
+
+
def n_splittable_leaf_nodes(tree: Tree) -> int:
+ """
+ The number of splittable leaf nodes
+ i.e. how many leaf nodes have more than one distinct values in their covariate matrix
+ """
+ return len(tree.splittable_leaf_nodes)
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
-
Works by combining a proposer and likihood evaluator into:
+
Works by combining a proposer and likelihood evaluator into:
- propose a mutation
-- assess likihood
-- accept if likihood higher than a uniform(0, 1) draw
+- assess likelihood
+- accept if likelihood higher than a uniform(0, 1) draw
Parameters
proposer : TreeMutationProposer
-
likihood_ratio : TreeMutationLikihoodRatio
+
likelihood_ratio : TreeMutationLikelihoodRatio
@@ -137,28 +137,28 @@
Parameters
A sampler for tree mutation space.
Responsible for producing samples of ways to mutate a tree within a model
- Works by combining a proposer and likihood evaluator into:
+ Works by combining a proposer and likelihood evaluator into:
- propose a mutation
- - assess likihood
- - accept if likihood higher than a uniform(0, 1) draw
+ - assess likelihood
+ - accept if likelihood higher than a uniform(0, 1) draw
Parameters
----------
proposer: TreeMutationProposer
- likihood_ratio: TreeMutationLikihoodRatio
+ likelihood_ratio: TreeMutationLikelihoodRatio
"""
def __init__(self,
proposer: TreeMutationProposer,
- likihood_ratio: TreeMutationLikihoodRatio,
+ likelihood_ratio: TreeMutationLikelihoodRatio,
scalar_sampler=UniformScalarSampler()):
self.proposer = proposer
- self.likihood_ratio = likihood_ratio
+ self.likelihood_ratio = likelihood_ratio
self._scalar_sampler = scalar_sampler
def sample(self, model: Model, tree: Tree) -> (Optional[TreeMutation], float):
proposal = self.proposer.propose(tree)
- ratio, (l_new, l_old), (prob_new, prob_old) = self.likihood_ratio.log_probability_ratio(model, tree, proposal)
+ ratio, (l_new, l_old), (prob_new, prob_old) = self.likelihood_ratio.log_probability_ratio(model, tree, proposal)
if self._scalar_sampler.sample() < ratio:
return proposal, np.exp(l_new) - np.exp(l_old), np.exp(prob_new) - np.exp(prob_old)
else:
@@ -188,7 +188,7 @@
diff --git a/docs/experimental/figs_ensembles.html b/docs/experimental/figs_ensembles.html
index 05cf5001..82772944 100644
--- a/docs/experimental/figs_ensembles.html
+++ b/docs/experimental/figs_ensembles.html
@@ -153,7 +153,7 @@
def _init_decision_function(self):
"""Sets decision function based on prediction_task
"""
- # used by sklearn GrriidSearchCV, BaggingClassifier
+ # used by sklearn GridSearchCV, BaggingClassifier
if self.prediction_task == 'classification':
decision_function = lambda x: self.predict_proba(x)[:, 1]
elif self.prediction_task == 'regression':
@@ -632,7 +632,7 @@
Classes
def _init_decision_function(self):
"""Sets decision function based on prediction_task
"""
- # used by sklearn GrriidSearchCV, BaggingClassifier
+ # used by sklearn GridSearchCV, BaggingClassifier
if self.prediction_task == 'classification':
decision_function = lambda x: self.predict_proba(x)[:, 1]
elif self.prediction_task == 'regression':
diff --git a/docs/rule_list/bayesian_rule_list/brl_util.html b/docs/rule_list/bayesian_rule_list/brl_util.html
index 80f699cd..32dd4506 100644
--- a/docs/rule_list/bayesian_rule_list/brl_util.html
+++ b/docs/rule_list/bayesian_rule_list/brl_util.html
@@ -62,7 +62,7 @@
# dictionary whose keys are a string Pickle-dump of the antecedent list d, and
# whose values are a list [a,b] where a is (proportional to) the log posterior of
# d, and b is the number of times d is present in the MCMC samples.
-# - d_star - the BRL-point antecedent list. A list of indicies corresponding to
+# - d_star - the BRL-point antecedent list. A list of indices corresponding to
# variable "itemsets."
# - itemsets - A list of itemsets. itemsets[d_star[i]] is the antecedent in
# position i on the BRL-point list
@@ -248,7 +248,7 @@
minrulesize = int(floor(avgrulesize))
maxrulesize = int(ceil(avgrulesize))
# Run through all perms again
- likelihds = []
+ likelihoods = []
d_ts = []
beta_Z, logalpha_pmf, logbeta_pmf = prior_calculations(lbda, len(X), eta,
maxlhs) # get the constants needed to compute the prior
@@ -267,11 +267,11 @@
# Compute the likelihood
R_t = d_t.index(0)
N_t = compute_rule_usage(d_t, R_t, X, Y)
- likelihds.append(
+ likelihoods.append(
fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen,
lhs_len))
- likelihds = array(likelihds)
- d_star = d_ts[likelihds.argmax()]
+ likelihoods = array(likelihoods)
+ d_star = d_ts[likelihoods.argmax()]
except RuntimeWarning:
# This can happen if all perms are identically [0], or if no soln is found within the len and width bounds (probably the chains didn't converge)
print('No suitable point estimate found')
@@ -450,7 +450,7 @@
move_probs = array(move_probs_default)
Jratios = array([1., move_probs[2] / float(move_probs[1]), move_probs[1] / float(move_probs[2])])
u = random.random()
- # First we will find the indicies for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location
+ # First we will find the indices for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location
if u < sum(move_probs[:1]):
# This is an on-list move.
step = 'move'
@@ -509,18 +509,18 @@
def fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len):
'''# Compute log posterior
'''
- logliklihood = fn_logliklihood(d_t, N_t, R_t, alpha)
+ loglikelihood = fn_loglikelihood(d_t, N_t, R_t, alpha)
logprior = fn_logprior(d_t, R_t, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len)
- return logliklihood + logprior
+ return loglikelihood + logprior
-def fn_logliklihood(d_t, N_t, R_t, alpha):
+def fn_loglikelihood(d_t, N_t, R_t, alpha):
'''Compute log likelihood
'''
gammaln_Nt_jk = gammaln(N_t + alpha)
gammaln_Nt_j = gammaln(sum(N_t + alpha, 1))
- logliklihood = sum(gammaln_Nt_jk) - sum(gammaln_Nt_j)
- return logliklihood
+ loglikelihood = sum(gammaln_Nt_jk) - sum(gammaln_Nt_j)
+ return loglikelihood
def fn_logprior(d_t, R_t, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len):
@@ -684,8 +684,8 @@
minrulesize = int(floor(avgrulesize))
maxrulesize = int(ceil(avgrulesize))
# Run through all perms again
- likelihds = []
+ likelihoods = []
d_ts = []
beta_Z, logalpha_pmf, logbeta_pmf = prior_calculations(lbda, len(X), eta,
maxlhs) # get the constants needed to compute the prior
@@ -878,11 +878,11 @@
Functions
# Compute the likelihood
R_t = d_t.index(0)
N_t = compute_rule_usage(d_t, R_t, X, Y)
- likelihds.append(
+ likelihoods.append(
fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen,
lhs_len))
- likelihds = array(likelihds)
- d_star = d_ts[likelihds.argmax()]
+ likelihoods = array(likelihoods)
+ d_star = d_ts[likelihoods.argmax()]
except RuntimeWarning:
# This can happen if all perms are identically [0], or if no soln is found within the len and width bounds (probably the chains didn't converge)
print('No suitable point estimate found')
@@ -1130,7 +1130,7 @@
Functions
move_probs = array(move_probs_default)
Jratios = array([1., move_probs[2] / float(move_probs[1]), move_probs[1] / float(move_probs[2])])
u = random.random()
- # First we will find the indicies for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location
+ # First we will find the indices for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location
if u < sum(move_probs[:1]):
# This is an on-list move.
step = 'move'
@@ -1246,7 +1246,7 @@
diff --git a/docs/rule_set/brs.html b/docs/rule_set/brs.html
index cb0e1b1d..a3aebffd 100644
--- a/docs/rule_set/brs.html
+++ b/docs/rule_set/brs.html
@@ -256,9 +256,9 @@
self.pattern_space[k] = self.pattern_space[k] + tmp
def _generate_rules(self, X, y, verbose):
- '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy
- there are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest.
- If maxlen is big, fpgrowh tends to generate too many rules that overflow the memories.
+ '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy.
+ There are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest.
+ If maxlen is big, fpgrowth tends to generate too many rules that overflow the memory.
'''
df = 1 - X # df has negative associations
@@ -845,9 +845,9 @@
Params
self.pattern_space[k] = self.pattern_space[k] + tmp
def _generate_rules(self, X, y, verbose):
- '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy
- there are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest.
- If maxlen is big, fpgrowh tends to generate too many rules that overflow the memories.
+ '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy.
+ There are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest.
+ If maxlen is big, fpgrowth tends to generate too many rules that overflow the memory.
'''
df = 1 - X # df has negative associations
diff --git a/docs/rule_set/slipper_util.html b/docs/rule_set/slipper_util.html
index 8744ffb0..41b5cc27 100644
--- a/docs/rule_set/slipper_util.html
+++ b/docs/rule_set/slipper_util.html
@@ -60,7 +60,7 @@
def _condition_classify(self, X, condition):
"""
- Helper funciton to make classificaitons for a condition
+ Helper function to make classifications for a condition
in a rule
"""
@@ -88,7 +88,7 @@
return preds
def _get_design_matrices(self, X, y, rule):
- """ produce design matrices used in most equaitons"""
+ """ produce design matrices used in most equations"""
preds = self._rule_predict(X, rule)
W_plus_idx = np.where((preds == 1) & (y == 1))
@@ -113,7 +113,7 @@
def _grow_rule(self, X, y):
""" Starts with empty conjunction of conditions and
- greedily adds rules to mazimize Z_tilda
+ greedily adds rules to maximize Z_tilde
"""
stop_condition = False
@@ -137,11 +137,11 @@
for A_c in pivots
]
- # get max Z_tilda and update candidate accordingly
- tildas = [self._grow_rule_obj(X, y, r) for r in feature_candidates]
- if max(tildas) > self._grow_rule_obj(X, y, candidate_rule):
+ # get max Z_tilde and update candidate accordingly
+ tildes = [self._grow_rule_obj(X, y, r) for r in feature_candidates]
+ if max(tildes) > self._grow_rule_obj(X, y, candidate_rule):
candidate_rule = feature_candidates[
- tildas.index(max(tildas))
+ tildes.index(max(tildes))
]
preds = self._rule_predict(X, candidate_rule)
@@ -249,7 +249,7 @@
def _set_rule_or_default(self, X, y, learned_rule):
"""
Compare output of eq 5 between learned rule and default rule
- return rule that minmizes eq 5
+ return rule that minimizes eq 5
"""
rules = [self._make_default_rule(X, y), learned_rule]
@@ -356,7 +356,7 @@
Classes
def _condition_classify(self, X, condition):
"""
- Helper funciton to make classificaitons for a condition
+ Helper function to make classifications for a condition
in a rule
"""
@@ -384,7 +384,7 @@
Classes
return preds
def _get_design_matrices(self, X, y, rule):
- """ produce design matrices used in most equaitons"""
+ """ produce design matrices used in most equations"""
preds = self._rule_predict(X, rule)
W_plus_idx = np.where((preds == 1) & (y == 1))
@@ -409,7 +409,7 @@
Classes
def _grow_rule(self, X, y):
""" Starts with empty conjunction of conditions and
- greedily adds rules to mazimize Z_tilda
+ greedily adds rules to maximize Z_tilde
"""
stop_condition = False
@@ -433,11 +433,11 @@
Classes
for A_c in pivots
]
- # get max Z_tilda and update candidate accordingly
- tildas = [self._grow_rule_obj(X, y, r) for r in feature_candidates]
- if max(tildas) > self._grow_rule_obj(X, y, candidate_rule):
+ # get max Z_tilde and update candidate accordingly
+ tildes = [self._grow_rule_obj(X, y, r) for r in feature_candidates]
+ if max(tildes) > self._grow_rule_obj(X, y, candidate_rule):
candidate_rule = feature_candidates[
- tildas.index(max(tildas))
+ tildes.index(max(tildes))
]
preds = self._rule_predict(X, candidate_rule)
@@ -545,7 +545,7 @@
Classes
def _set_rule_or_default(self, X, y, learned_rule):
"""
Compare output of eq 5 between learned rule and default rule
- return rule that minmizes eq 5
+ return rule that minimizes eq 5
"""
rules = [self._make_default_rule(X, y), learned_rule]
diff --git a/docs/tree/cart_ccp.html b/docs/tree/cart_ccp.html
index 73791993..77db6a62 100644
--- a/docs/tree/cart_ccp.html
+++ b/docs/tree/cart_ccp.html
@@ -819,7 +819,7 @@
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
diff --git a/docs/tree/gosdt/pygosdt.html b/docs/tree/gosdt/pygosdt.html
index 73d608a5..9975695c 100644
--- a/docs/tree/gosdt/pygosdt.html
+++ b/docs/tree/gosdt/pygosdt.html
@@ -212,7 +212,7 @@
Returns
---
- real number : the accuracy produced by applying this model overthe given dataset, with
+ real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
"""
validation.check_is_fitted(self)
@@ -532,7 +532,7 @@
Classes
Returns
---
- real number : the accuracy produced by applying this model overthe given dataset, with
+ real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
"""
validation.check_is_fitted(self)
@@ -1014,7 +1014,7 @@
Returns
Returns
-
real number : the accuracy produced by applying this model overthe given dataset, with
+
real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
@@ -1034,7 +1034,7 @@
Returns
Returns
---
- real number : the accuracy produced by applying this model overthe given dataset, with
+ real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
"""
validation.check_is_fitted(self)
diff --git a/docs/tree/gosdt/pygosdt_helper.html b/docs/tree/gosdt/pygosdt_helper.html
index cca61583..a15e9964 100644
--- a/docs/tree/gosdt/pygosdt_helper.html
+++ b/docs/tree/gosdt/pygosdt_helper.html
@@ -180,7 +180,7 @@
Returns
---
- array-like, shape = [n_sampels by 1] : a column where each element is the prediction
+ array-like, shape = [n_samples by 1] : a column where each element is the prediction
associated with each row
"""
# Perform an encoding if an encoding unit is specified
@@ -235,7 +235,7 @@
Returns
---
- real number : the inaccuracy produced by applying this model overthe given dataset, with
+ real number : the inaccuracy produced by applying this model over the given dataset, with
optionals for weighted inaccuracy
"""
return 1 - self.score(X, y, weight=weight)
@@ -253,7 +253,7 @@
Returns
---
- real number : the accuracy produced by applying this model overthe given dataset, with
+ real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
"""
y_hat = self.predict(X)
@@ -350,7 +350,7 @@
"""
Returns
---
- string : pseuodocode representing the logic of this classifier
+ string : pseudocode representing the logic of this classifier
"""
cases = []
for group in self.__groups__():
@@ -803,7 +803,7 @@
Methods
Returns
---
- array-like, shape = [n_sampels by 1] : a column where each element is the prediction
+ array-like, shape = [n_samples by 1] : a column where each element is the prediction
associated with each row
"""
# Perform an encoding if an encoding unit is specified
@@ -858,7 +858,7 @@
Methods
Returns
---
- real number : the inaccuracy produced by applying this model overthe given dataset, with
+ real number : the inaccuracy produced by applying this model over the given dataset, with
optionals for weighted inaccuracy
"""
return 1 - self.score(X, y, weight=weight)
@@ -876,7 +876,7 @@
Methods
Returns
---
- real number : the accuracy produced by applying this model overthe given dataset, with
+ real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
"""
y_hat = self.predict(X)
@@ -973,7 +973,7 @@
Methods
"""
Returns
---
- string : pseuodocode representing the logic of this classifier
+ string : pseudocode representing the logic of this classifier
"""
cases = []
for group in self.__groups__():
@@ -1251,7 +1251,7 @@
Returns
Returns
-
real number : the inaccuracy produced by applying this model overthe given dataset, with
+
real number : the inaccuracy produced by applying this model over the given dataset, with
optionals for weighted inaccuracy
@@ -1271,7 +1271,7 @@
Returns
Returns
---
- real number : the inaccuracy produced by applying this model overthe given dataset, with
+ real number : the inaccuracy produced by applying this model over the given dataset, with
optionals for weighted inaccuracy
"""
return 1 - self.score(X, y, weight=weight)
@@ -1510,7 +1510,7 @@
Parameters
Returns
-
array-like, shape = [n_sampels by 1] : a column where each element is the prediction
+
array-like, shape = [n_samples by 1] : a column where each element is the prediction
associated with each row
@@ -1531,7 +1531,7 @@
Returns
Returns
---
- array-like, shape = [n_sampels by 1] : a column where each element is the prediction
+ array-like, shape = [n_samples by 1] : a column where each element is the prediction
associated with each row
"""
# Perform an encoding if an encoding unit is specified
@@ -1561,7 +1561,7 @@
Returns
Returns
-
real number : the accuracy produced by applying this model overthe given dataset, with
+
real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
@@ -1581,7 +1581,7 @@
Returns
Returns
---
- real number : the accuracy produced by applying this model overthe given dataset, with
+ real number : the accuracy produced by applying this model over the given dataset, with
optionals for weighted accuracy
"""
y_hat = self.predict(X)
diff --git a/docs/tree/hierarchical_shrinkage.html b/docs/tree/hierarchical_shrinkage.html
index 02febe0e..0801d286 100644
--- a/docs/tree/hierarchical_shrinkage.html
+++ b/docs/tree/hierarchical_shrinkage.html
@@ -30,7 +30,8 @@
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
-from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, export_text
+from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, \
+ export_text
from sklearn.ensemble import GradientBoostingClassifier
from imodels.util import checks
@@ -50,7 +51,7 @@
Params
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
- Defaults to CART Classification Tree with 20 max leaf ndoes
+ Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
@@ -76,11 +77,11 @@
return {'reg_param': self.reg_param, 'estimator_': self.estimator_,
'shrinkage_scheme_': self.shrinkage_scheme_}
- def fit(self, X, y, *args, **kwargs):
+ def fit(self, X, y, sample_weight=None, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
feature_names = kwargs.pop('feature_names', None) # None returned if not passed
X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
- self.estimator_ = self.estimator_.fit(X, y, *args, **kwargs)
+ self.estimator_ = self.estimator_.fit(X, y, sample_weight, *args, **kwargs)
self._shrink()
# compute complexity
@@ -181,6 +182,27 @@
else:
return s + export_text(self.estimator_, show_weights=True)
+ def __repr__(self):
+ # s = self.__class__.__name__
+ # s += "("
+ # s += "estimator_="
+ # s += repr(self.estimator_)
+ # s += ", "
+ # s += "reg_param="
+ # s += str(self.reg_param)
+ # s += ", "
+ # s += "shrinkage_scheme_="
+ # s += self.shrinkage_scheme_
+ # s += ")"
+ # return s
+ attr_list = ["estimator_", "reg_param", "shrinkage_scheme_"]
+ s = self.__class__.__name__
+ s += "("
+ for attr in attr_list:
+ s += attr + "=" + repr(getattr(self, attr)) + ", "
+ s = s[:-2] + ")"
+ return s
+
class HSTreeRegressor(HSTree, RegressorMixin):
...
@@ -202,7 +224,7 @@
------
estimator_
Sklearn estimator (already initialized).
- If no estimator_ is passsed, sklearn decision tree is used
+ If no estimator_ is passed, sklearn decision tree is used
max_rules
If estimator is None, then max_leaf_nodes is passed to the default decision tree
@@ -232,6 +254,16 @@
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y, *args, **kwargs)
+ def __repr__(self):
+ attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_",
+ "cv", "scoring"]
+ s = self.__class__.__name__
+ s += "("
+ for attr in attr_list:
+ s += attr + "=" + repr(getattr(self, attr)) + ", "
+ s = s[:-2] + ")"
+ return s
+
class HSTreeRegressorCV(HSTreeRegressor):
def __init__(self, estimator_: BaseEstimator = None,
@@ -245,7 +277,7 @@
------
estimator_
Sklearn estimator (already initialized).
- If no estimator_ is passsed, sklearn decision tree is used
+ If no estimator_ is passed, sklearn decision tree is used
max_rules
If estimator is None, then max_leaf_nodes is passed to the default decision tree
@@ -275,6 +307,16 @@
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
super().fit(X=X, y=y, *args, **kwargs)
+ def __repr__(self):
+ attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_",
+ "cv", "scoring"]
+ s = self.__class__.__name__
+ s += "("
+ for attr in attr_list:
+ s += attr + "=" + repr(getattr(self, attr)) + ", "
+ s = s[:-2] + ")"
+ return s
+
if __name__ == '__main__':
np.random.seed(15)
@@ -338,7 +380,7 @@
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
@@ -363,7 +405,7 @@
Params
Params
------
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
- Defaults to CART Classification Tree with 20 max leaf ndoes
+ Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
@@ -389,11 +431,11 @@
Params
return {'reg_param': self.reg_param, 'estimator_': self.estimator_,
'shrinkage_scheme_': self.shrinkage_scheme_}
- def fit(self, X, y, *args, **kwargs):
+ def fit(self, X, y, sample_weight=None, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
feature_names = kwargs.pop('feature_names', None) # None returned if not passed
X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
- self.estimator_ = self.estimator_.fit(X, y, *args, **kwargs)
+ self.estimator_ = self.estimator_.fit(X, y, sample_weight, *args, **kwargs)
self._shrink()
# compute complexity
@@ -492,7 +534,28 @@
Params
if hasattr(self, 'feature_names') and self.feature_names is not None:
return s + export_text(self.estimator_, feature_names=self.feature_names, show_weights=True)
else:
- return s + export_text(self.estimator_, show_weights=True)
+ return s + export_text(self.estimator_, show_weights=True)
+
+ def __repr__(self):
+ # s = self.__class__.__name__
+ # s += "("
+ # s += "estimator_="
+ # s += repr(self.estimator_)
+ # s += ", "
+ # s += "reg_param="
+ # s += str(self.reg_param)
+ # s += ", "
+ # s += "shrinkage_scheme_="
+ # s += self.shrinkage_scheme_
+ # s += ")"
+ # return s
+ attr_list = ["estimator_", "reg_param", "shrinkage_scheme_"]
+ s = self.__class__.__name__
+ s += "("
+ for attr in attr_list:
+ s += attr + "=" + repr(getattr(self, attr)) + ", "
+ s = s[:-2] + ")"
+ return s
Subclasses
@@ -502,7 +565,7 @@
Subclasses
Methods
-def fit(self, X, y, *args, **kwargs)
+def fit(self, X, y, sample_weight=None, *args, **kwargs)
@@ -510,11 +573,11 @@
Methods
Expand source code
-
def fit(self, X, y, *args, **kwargs):
+
def fit(self, X, y, sample_weight=None, *args, **kwargs):
# remove feature_names if it exists (note: only works as keyword-arg)
feature_names = kwargs.pop('feature_names', None) # None returned if not passed
X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
- self.estimator_ = self.estimator_.fit(X, y, *args, **kwargs)
+ self.estimator_ = self.estimator_.fit(X, y, sample_weight, *args, **kwargs)
self._shrink()
# compute complexity
@@ -608,7 +671,7 @@
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
@@ -645,7 +708,7 @@
Subclasses
Params
estimator_
Sklearn estimator (already initialized).
-If no estimator_ is passsed, sklearn decision tree is used
+If no estimator_ is passed, sklearn decision tree is used
max_rules
If estimator is None, then max_leaf_nodes is passed to the default decision tree
args, kwargs
@@ -666,7 +729,7 @@
Params
------
estimator_
Sklearn estimator (already initialized).
- If no estimator_ is passsed, sklearn decision tree is used
+ If no estimator_ is passed, sklearn decision tree is used
max_rules
If estimator is None, then max_leaf_nodes is passed to the default decision tree
@@ -694,7 +757,17 @@
estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting)
-Defaults to CART Classification Tree with 20 max leaf ndoes
+Defaults to CART Classification Tree with 20 max leaf nodes
Note: this estimator will be directly modified
reg_param: float
Higher is more regularization (can be arbitrarily large, should not be < 0)
@@ -775,7 +848,7 @@
Subclasses
Params
estimator_
Sklearn estimator (already initialized).
-If no estimator_ is passsed, sklearn decision tree is used
+If no estimator_ is passed, sklearn decision tree is used
max_rules
If estimator is None, then max_leaf_nodes is passed to the default decision tree
args, kwargs
@@ -796,7 +869,7 @@
Params
------
estimator_
Sklearn estimator (already initialized).
- If no estimator_ is passsed, sklearn decision tree is used
+ If no estimator_ is passed, sklearn decision tree is used
max_rules
If estimator is None, then max_leaf_nodes is passed to the default decision tree
@@ -824,7 +897,17 @@
Params
cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring)
self.scores_.append(np.mean(cv_scores))
self.reg_param = self.reg_param_list[np.argmax(self.scores_)]
- super().fit(X=X, y=y, *args, **kwargs)
+ super().fit(X=X, y=y, *args, **kwargs)
+
+ def __repr__(self):
+ attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_",
+ "cv", "scoring"]
+ s = self.__class__.__name__
+ s += "("
+ for attr in attr_list:
+ s += attr + "=" + repr(getattr(self, attr)) + ", "
+ s = s[:-2] + ")"
+ return s