From c18078ecb919597bdaaae0ca4d521093274f02f2 Mon Sep 17 00:00:00 2001 From: Chandan Singh Date: Tue, 3 Jan 2023 11:34:32 -0500 Subject: [PATCH] docs reupdate --- docs/discretization/discretizer.html | 20 +- docs/discretization/index.html | 2 +- docs/discretization/mdlp.html | 8 +- docs/experimental/bartpy/data.html | 4 +- .../bartpy/diagnostics/diagnostics.html | 14 +- .../bartpy/diagnostics/motivation.html | 4 +- .../bartpy/diagnostics/residuals.html | 10 +- .../bartpy/diagnostics/sampling.html | 10 +- .../initializers/sklearntreeinitializer.html | 22 +- docs/experimental/bartpy/model.html | 8 +- .../bartpy/samplers/leafnode.html | 30 +- .../bartpy/samplers/oblivioustrees/index.html | 4 +- .../oblivioustrees/likelihoodratio.html | 562 ++++++++++++++ .../samplers/oblivioustrees/treemutation.html | 52 +- .../bartpy/samplers/treemutation.html | 126 +-- .../samplers/unconstrainedtree/index.html | 4 +- .../unconstrainedtree/likelihoodratio.html | 721 ++++++++++++++++++ .../unconstrainedtree/treemutation.html | 52 +- docs/experimental/bartpy/sklearnmodel.html | 12 +- docs/experimental/figs_ensembles.html | 4 +- .../bayesian_rule_list/brl_util.html | 48 +- docs/rule_set/brs.html | 12 +- docs/rule_set/slipper_util.html | 32 +- docs/tree/cart_ccp.html | 4 +- docs/tree/gosdt/pygosdt.html | 8 +- docs/tree/gosdt/pygosdt_helper.html | 28 +- docs/tree/hierarchical_shrinkage.html | 127 ++- docs/util/neural_nets.html | 4 +- 28 files changed, 1649 insertions(+), 283 deletions(-) create mode 100644 docs/experimental/bartpy/samplers/oblivioustrees/likelihoodratio.html create mode 100644 docs/experimental/bartpy/samplers/unconstrainedtree/likelihoodratio.html diff --git a/docs/discretization/discretizer.html b/docs/discretization/discretizer.html index 40426bb1..36f0bec7 100644 --- a/docs/discretization/discretizer.html +++ b/docs/discretization/discretizer.html @@ -183,7 +183,7 @@ max values for the range of x keep_pointwise_bins : boolean - If True, treat duplicate bin_edges as a pointiwse bin, + If True, treat duplicate bin_edges as a pointwise bin, i.e., [a, a]. If False, these bins are in effect ignored. Returns @@ -480,7 +480,7 @@ manual_discretizer_ : dictionary Provides bin_edges to feed into _quantile_discretization() - and do quantile discreization manually for features where + and do quantile discretization manually for features where KBinsDiscretizer() failed. Ignored if strategy != 'quantile' or no errors in KBinsDiscretizer(). @@ -515,7 +515,7 @@ self """ - # initalization and error checking + # initialization and error checking self._fit_preprocessing(X) # apply KBinsDiscretizer to the selected columns @@ -734,7 +734,7 @@ Parameters ---------- - X : data frame of shape (n_samples, n_fatures) + X : data frame of shape (n_samples, n_features) Training data used to fit RF y : array-like of shape (n_samples,) @@ -1119,7 +1119,7 @@

Params

max values for the range of x keep_pointwise_bins : boolean - If True, treat duplicate bin_edges as a pointiwse bin, + If True, treat duplicate bin_edges as a pointwise bin, i.e., [a, a]. If False, these bins are in effect ignored. Returns @@ -1287,7 +1287,7 @@

Attributes

Primary discretization method used to bin numeric data
manual_discretizer_ : dictionary
Provides bin_edges to feed into _quantile_discretization() -and do quantile discreization manually for features where +and do quantile discretization manually for features where KBinsDiscretizer() failed. Ignored if strategy != 'quantile' or no errors in KBinsDiscretizer().
onehot_ : object of class OneHotEncoder()
@@ -1352,7 +1352,7 @@

Examples

manual_discretizer_ : dictionary Provides bin_edges to feed into _quantile_discretization() - and do quantile discreization manually for features where + and do quantile discretization manually for features where KBinsDiscretizer() failed. Ignored if strategy != 'quantile' or no errors in KBinsDiscretizer(). @@ -1387,7 +1387,7 @@

Examples

self """ - # initalization and error checking + # initialization and error checking self._fit_preprocessing(X) # apply KBinsDiscretizer to the selected columns @@ -1509,7 +1509,7 @@

Returns

self """ - # initalization and error checking + # initialization and error checking self._fit_preprocessing(X) # apply KBinsDiscretizer to the selected columns @@ -2135,7 +2135,7 @@

Attributes

Parameters ---------- - X : data frame of shape (n_samples, n_fatures) + X : data frame of shape (n_samples, n_features) Training data used to fit RF y : array-like of shape (n_samples,) diff --git a/docs/discretization/index.html b/docs/discretization/index.html index 6ee76721..a4fbbaec 100644 --- a/docs/discretization/index.html +++ b/docs/discretization/index.html @@ -36,7 +36,7 @@

Sub-modules

imodels.discretization.mdlp

Discretization MDLP -Python implementation of Fayyad and Irani's MDLP criterion discretiation algorithm …

+Python implementation of Fayyad and Irani's MDLP criterion discretization algorithm …

imodels.discretization.simple
diff --git a/docs/discretization/mdlp.html b/docs/discretization/mdlp.html index 37795d19..fd99e112 100644 --- a/docs/discretization/mdlp.html +++ b/docs/discretization/mdlp.html @@ -18,7 +18,7 @@

Discretization MDLP

-

Python implementation of Fayyad and Irani's MDLP criterion discretiation algorithm

+

Python implementation of Fayyad and Irani's MDLP criterion discretization algorithm

Reference: Irani, Keki B. "Multi-interval discretization of continuous-valued attributes for classification learning." (1993).

@@ -27,7 +27,7 @@

Discretization MDLP

'''
 # Discretization MDLP
-Python implementation of Fayyad and Irani's MDLP criterion discretiation algorithm
+Python implementation of Fayyad and Irani's MDLP criterion discretization algorithm
 
 **Reference:**
 Irani, Keki B. "Multi-interval discretization of continuous-valued attributes for classification learning." (1993).
@@ -138,7 +138,7 @@ 

Discretization MDLP

''' Given an attribute, find all potential cut_points (boundary points) :param feature: feature of interest - :param partition_index: indices of rows for which feature value falls whithin interval of interest + :param partition_index: indices of rows for which feature value falls within interval of interest :return: array with potential cut_points ''' # get dataframe with only rows of interest, and feature and class columns @@ -839,7 +839,7 @@

Params

''' Given an attribute, find all potential cut_points (boundary points) :param feature: feature of interest - :param partition_index: indices of rows for which feature value falls whithin interval of interest + :param partition_index: indices of rows for which feature value falls within interval of interest :return: array with potential cut_points ''' # get dataframe with only rows of interest, and feature and class columns diff --git a/docs/experimental/bartpy/data.html b/docs/experimental/bartpy/data.html index 3f236b37..f94693a0 100644 --- a/docs/experimental/bartpy/data.html +++ b/docs/experimental/bartpy/data.html @@ -99,7 +99,7 @@ self._n_features = X.shape[1] self._mask = mask - # Cache iniialization + # Cache initialization if unique_columns is not None: self._unique_columns = [x if x is True else None for x in unique_columns] else: @@ -524,7 +524,7 @@

Classes

self._n_features = X.shape[1] self._mask = mask - # Cache iniialization + # Cache initialization if unique_columns is not None: self._unique_columns = [x if x is True else None for x in unique_columns] else: diff --git a/docs/experimental/bartpy/diagnostics/diagnostics.html b/docs/experimental/bartpy/diagnostics/diagnostics.html index 3def619c..3d21593b 100644 --- a/docs/experimental/bartpy/diagnostics/diagnostics.html +++ b/docs/experimental/bartpy/diagnostics/diagnostics.html @@ -30,8 +30,8 @@ from sklearn.metrics import mean_squared_error from imodels.util.tree_interaction_utils import get_interacting_features -from ..diagnostics.residuals import plot_qq, plot_homoskedasity_diagnostics -from ..diagnostics.sampling import plot_tree_mutation_acceptance_rate, plot_tree_likelihhod, plot_tree_probs +from ..diagnostics.residuals import plot_qq, plot_homoscedasticity_diagnostics +from ..diagnostics.sampling import plot_tree_mutation_acceptance_rate, plot_tree_likelihood, plot_tree_probs from ..diagnostics.sigma import plot_sigma_convergence from ..diagnostics.trees import plot_tree_depth from ..initializers.sklearntreeinitializer import SklearnTreeInitializer @@ -44,9 +44,9 @@ plot_qq(model, ax1) plot_tree_depth(model, ax2) plot_sigma_convergence(model, ax3) - plot_homoskedasity_diagnostics(model, ax4) + plot_homoscedasticity_diagnostics(model, ax4) plot_tree_mutation_acceptance_rate(model, ax5) - # plot_tree_likelihhod(model, ax6) + # plot_tree_likelihood(model, ax6) # plot_tree_probs(model, ax7) plt.show() @@ -108,7 +108,7 @@ # plot_tree_depth(bart_figs, ax2, # f"FIGS initialization (MSE: {np.round(mean_squared_error(bart_figs_preds, y_test), 4)}" # f", FIGS MSE: {np.round(mean_squared_error(figs_preds, y_test), 2)})", x_label=True) - # plt.title(f"Bayesian tree with different initilization of Friedman 1 dataset n={n}") + # plt.title(f"Bayesian tree with different initialization of Friedman 1 dataset n={n}") plt.show() @@ -138,9 +138,9 @@

Functions

plot_qq(model, ax1) plot_tree_depth(model, ax2) plot_sigma_convergence(model, ax3) - plot_homoskedasity_diagnostics(model, ax4) + plot_homoscedasticity_diagnostics(model, ax4) plot_tree_mutation_acceptance_rate(model, ax5) - # plot_tree_likelihhod(model, ax6) + # plot_tree_likelihood(model, ax6) # plot_tree_probs(model, ax7) plt.show()
diff --git a/docs/experimental/bartpy/diagnostics/motivation.html b/docs/experimental/bartpy/diagnostics/motivation.html index 0656a4d9..385cba51 100644 --- a/docs/experimental/bartpy/diagnostics/motivation.html +++ b/docs/experimental/bartpy/diagnostics/motivation.html @@ -141,7 +141,7 @@ for c in range(n_chains): clr = next(color) - chain_preds = model.chain_precitions(X, c) + chain_preds = model.chain_predictions(X, c) mean_pred = np.array(chain_preds).mean(axis=0) y_plt = [mean_squared_error(mean_pred, p) for p in chain_preds] @@ -445,7 +445,7 @@

Functions

for c in range(n_chains): clr = next(color) - chain_preds = model.chain_precitions(X, c) + chain_preds = model.chain_predictions(X, c) mean_pred = np.array(chain_preds).mean(axis=0) y_plt = [mean_squared_error(mean_pred, p) for p in chain_preds] diff --git a/docs/experimental/bartpy/diagnostics/residuals.html b/docs/experimental/bartpy/diagnostics/residuals.html index 1444b886..97106c0b 100644 --- a/docs/experimental/bartpy/diagnostics/residuals.html +++ b/docs/experimental/bartpy/diagnostics/residuals.html @@ -37,7 +37,7 @@ return ax -def plot_homoskedasity_diagnostics(model: SklearnModel, ax=None): +def plot_homoscedasticity_diagnostics(model: SklearnModel, ax=None): if ax is None: _, ax = plt.subplots(1, 1, figsize=(5, 5)) sns.regplot(model.predict(model.data.X.values), model.residuals(model.data.X.values), ax=ax) @@ -54,8 +54,8 @@

Functions

-
-def plot_homoskedasity_diagnostics(model: SklearnModel, ax=None) +
+def plot_homoscedasticity_diagnostics(model: SklearnModel, ax=None)
@@ -63,7 +63,7 @@

Functions

Expand source code -
def plot_homoskedasity_diagnostics(model: SklearnModel, ax=None):
+
def plot_homoscedasticity_diagnostics(model: SklearnModel, ax=None):
     if ax is None:
         _, ax = plt.subplots(1, 1, figsize=(5, 5))
     sns.regplot(model.predict(model.data.X.values), model.residuals(model.data.X.values), ax=ax)
@@ -109,7 +109,7 @@ 

Index 🔍

  • Functions

  • diff --git a/docs/experimental/bartpy/diagnostics/sampling.html b/docs/experimental/bartpy/diagnostics/sampling.html index 5cd7cd4a..7d1617ff 100644 --- a/docs/experimental/bartpy/diagnostics/sampling.html +++ b/docs/experimental/bartpy/diagnostics/sampling.html @@ -38,7 +38,7 @@ ax.set_ylim((0, 1.1)) return ax -def plot_tree_likelihhod(model: SklearnModel, ax=None): +def plot_tree_likelihood(model: SklearnModel, ax=None): if ax is None: fig, ax = plt.subplots(1, 1) @@ -70,8 +70,8 @@

    Functions

    -
    -def plot_tree_likelihhod(model: SklearnModel, ax=None) +
    +def plot_tree_likelihood(model: SklearnModel, ax=None)
    @@ -79,7 +79,7 @@

    Functions

    Expand source code -
    def plot_tree_likelihhod(model: SklearnModel, ax=None):
    +
    def plot_tree_likelihood(model: SklearnModel, ax=None):
         if ax is None:
             fig, ax = plt.subplots(1, 1)
     
    @@ -152,7 +152,7 @@ 

    Index 🔍

  • Functions

    diff --git a/docs/experimental/bartpy/initializers/sklearntreeinitializer.html b/docs/experimental/bartpy/initializers/sklearntreeinitializer.html index 5a26cb7e..0d9f6523 100644 --- a/docs/experimental/bartpy/initializers/sklearntreeinitializer.html +++ b/docs/experimental/bartpy/initializers/sklearntreeinitializer.html @@ -102,14 +102,14 @@ return -def enumarate_tree(tree: Node, num_iter=iter(range(int(1e+06)))): +def enumerate_tree(tree: Node, num_iter=iter(range(int(1e+06)))): if tree is None: return tree.number = next(num_iter) # if hasattr(tree, 'left'): - enumarate_tree(get_child(tree, 'left'), num_iter) + enumerate_tree(get_child(tree, 'left'), num_iter) # if hasattr(tree, 'right'): - enumarate_tree(get_child(tree, 'right'), num_iter) + enumerate_tree(get_child(tree, 'right'), num_iter) def fill_nodes_dict(tree: Node, node_dict: dict): @@ -125,7 +125,7 @@ class SkTree: def __init__(self, tree: Node): nodes_dict = {} - enumarate_tree(tree, num_iter=iter(range(int(1e+06)))) + enumerate_tree(tree, num_iter=iter(range(int(1e+06)))) fill_nodes_dict(tree, nodes_dict) self.children_left = [] self.children_right = [] @@ -231,8 +231,8 @@

    Functions

    -
    -def enumarate_tree(tree: Node, num_iter=<range_iterator object>) +
    +def enumerate_tree(tree: Node, num_iter=<range_iterator object>)
    @@ -240,14 +240,14 @@

    Functions

    Expand source code -
    def enumarate_tree(tree: Node, num_iter=iter(range(int(1e+06)))):
    +
    def enumerate_tree(tree: Node, num_iter=iter(range(int(1e+06)))):
         if tree is None:
             return
         tree.number = next(num_iter)
         # if hasattr(tree, 'left'):
    -    enumarate_tree(get_child(tree, 'left'), num_iter)
    +    enumerate_tree(get_child(tree, 'left'), num_iter)
         # if hasattr(tree, 'right'):
    -    enumarate_tree(get_child(tree, 'right'), num_iter)
    + enumerate_tree(get_child(tree, 'right'), num_iter)
  • @@ -427,7 +427,7 @@

    Classes

    class SkTree:
         def __init__(self, tree: Node):
             nodes_dict = {}
    -        enumarate_tree(tree, num_iter=iter(range(int(1e+06))))
    +        enumerate_tree(tree, num_iter=iter(range(int(1e+06))))
             fill_nodes_dict(tree, nodes_dict)
             self.children_left = []
             self.children_right = []
    @@ -580,7 +580,7 @@ 

    Index 🔍

  • Functions

      -
    • enumarate_tree
    • +
    • enumerate_tree
    • fill_nodes_dict
    • get_bartpy_tree_from_sklearn
    • get_child
    • diff --git a/docs/experimental/bartpy/model.html b/docs/experimental/bartpy/model.html index 36c9a7fa..33830337 100644 --- a/docs/experimental/bartpy/model.html +++ b/docs/experimental/bartpy/model.html @@ -56,7 +56,7 @@ self._sigma = sigma self._prediction = None self._initializer = initializer - self._check_initilizer() + self._check_initializer() self.classification = classification if trees is None: @@ -77,7 +77,7 @@ self.n_trees = len(trees) self._trees = trees - def _check_initilizer(self): + def _check_initializer(self): if not hasattr(self._initializer, "_tree"): return elif self._initializer._tree is None: @@ -203,7 +203,7 @@

      Classes

      self._sigma = sigma self._prediction = None self._initializer = initializer - self._check_initilizer() + self._check_initializer() self.classification = classification if trees is None: @@ -224,7 +224,7 @@

      Classes

      self.n_trees = len(trees) self._trees = trees - def _check_initilizer(self): + def _check_initializer(self): if not hasattr(self._initializer, "_tree"): return elif self._initializer._tree is None: diff --git a/docs/experimental/bartpy/samplers/leafnode.html b/docs/experimental/bartpy/samplers/leafnode.html index ee8cd137..5ce08622 100644 --- a/docs/experimental/bartpy/samplers/leafnode.html +++ b/docs/experimental/bartpy/samplers/leafnode.html @@ -49,11 +49,11 @@ def sample(self, model: Model, node: LeafNode) -> float: prior_var = model.sigma_m ** 2 n = node.data.X.n_obsv - likihood_var = (model.sigma.current_value() ** 2) / n - likihood_mean = node.data.y.summed_y() / n - node.set_mean_response(likihood_mean) - posterior_variance = 1. / (1. / prior_var + 1. / likihood_var) - posterior_mean = likihood_mean * 1#(prior_var / (likihood_var + prior_var)) + likelihood_var = (model.sigma.current_value() ** 2) / n + likelihood_mean = node.data.y.summed_y() / n + node.set_mean_response(likelihood_mean) + posterior_variance = 1. / (1. / prior_var + 1. / likelihood_var) + posterior_mean = likelihood_mean * 1#(prior_var / (likelihood_var + prior_var)) val = posterior_mean# + (self._scalar_sampler.sample() * np.power(posterior_variance / model.n_trees, 0.5)) return val @@ -112,11 +112,11 @@

      Classes

      def sample(self, model: Model, node: LeafNode) -> float: prior_var = model.sigma_m ** 2 n = node.data.X.n_obsv - likihood_var = (model.sigma.current_value() ** 2) / n - likihood_mean = node.data.y.summed_y() / n - node.set_mean_response(likihood_mean) - posterior_variance = 1. / (1. / prior_var + 1. / likihood_var) - posterior_mean = likihood_mean * 1#(prior_var / (likihood_var + prior_var)) + likelihood_var = (model.sigma.current_value() ** 2) / n + likelihood_mean = node.data.y.summed_y() / n + node.set_mean_response(likelihood_mean) + posterior_variance = 1. / (1. / prior_var + 1. / likelihood_var) + posterior_mean = likelihood_mean * 1#(prior_var / (likelihood_var + prior_var)) val = posterior_mean# + (self._scalar_sampler.sample() * np.power(posterior_variance / model.n_trees, 0.5)) return val
  • @@ -139,11 +139,11 @@

    Methods

    def sample(self, model: Model, node: LeafNode) -> float:
         prior_var = model.sigma_m ** 2
         n = node.data.X.n_obsv
    -    likihood_var = (model.sigma.current_value() ** 2) / n
    -    likihood_mean = node.data.y.summed_y() / n
    -    node.set_mean_response(likihood_mean)
    -    posterior_variance = 1. / (1. / prior_var + 1. / likihood_var)
    -    posterior_mean = likihood_mean * 1#(prior_var / (likihood_var + prior_var))
    +    likelihood_var = (model.sigma.current_value() ** 2) / n
    +    likelihood_mean = node.data.y.summed_y() / n
    +    node.set_mean_response(likelihood_mean)
    +    posterior_variance = 1. / (1. / prior_var + 1. / likelihood_var)
    +    posterior_mean = likelihood_mean * 1#(prior_var / (likelihood_var + prior_var))
         val = posterior_mean# + (self._scalar_sampler.sample() * np.power(posterior_variance / model.n_trees, 0.5))
         return val
    diff --git a/docs/experimental/bartpy/samplers/oblivioustrees/index.html b/docs/experimental/bartpy/samplers/oblivioustrees/index.html index 6764a903..32e9284a 100644 --- a/docs/experimental/bartpy/samplers/oblivioustrees/index.html +++ b/docs/experimental/bartpy/samplers/oblivioustrees/index.html @@ -21,7 +21,7 @@

    Sub-modules

    -
    imodels.experimental.bartpy.samplers.oblivioustrees.likihoodratio
    +
    imodels.experimental.bartpy.samplers.oblivioustrees.likelihoodratio
    @@ -61,7 +61,7 @@

    Index 🔍

  • Sub-modules

    diff --git a/docs/experimental/bartpy/samplers/oblivioustrees/likelihoodratio.html b/docs/experimental/bartpy/samplers/oblivioustrees/likelihoodratio.html new file mode 100644 index 00000000..5d094bcc --- /dev/null +++ b/docs/experimental/bartpy/samplers/oblivioustrees/likelihoodratio.html @@ -0,0 +1,562 @@ + + + + + + + + + + + + + + + + +
    +
    +
    +
    + +Expand source code + +
    from typing import List
    +
    +import numpy as np
    +
    +from ...model import Model
    +from ...mutation import TreeMutation, GrowMutation, PruneMutation
    +from ...node import LeafNode, TreeNode
    +from ...samplers.treemutation import TreeMutationLikelihoodRatio
    +from ...sigma import Sigma
    +from ...tree import Tree
    +
    +
    +def log_grow_ratio(combined_node: LeafNode, left_node: LeafNode, right_node: LeafNode, sigma: Sigma, sigma_mu: float):
    +    var = np.power(sigma.current_value(), 2)
    +    var_mu = np.power(sigma_mu, 2)
    +    n = combined_node.data.X.n_obsv
    +    n_l = left_node.data.X.n_obsv
    +    n_r = right_node.data.X.n_obsv
    +
    +    first_term = (var * (var + n * sigma_mu)) / ((var + n_l * var_mu) * (var + n_r * var_mu))
    +    first_term = np.log(np.sqrt(first_term))
    +
    +    left_resp_contribution = np.square(left_node.data.y.summed_y()) / (var + n_l * sigma_mu)
    +    right_resp_contribution = np.square(right_node.data.y.summed_y()) / (var + n_r * sigma_mu)
    +    combined_resp_contribution = np.square(combined_node.data.y.summed_y()) / (var + n * sigma_mu)
    +
    +    resp_contribution = left_resp_contribution + right_resp_contribution - combined_resp_contribution
    +
    +    return first_term + ((var_mu / (2 * var)) * resp_contribution)
    +
    +
    +class UniformTreeMutationLikelihoodRatio(TreeMutationLikelihoodRatio):
    +
    +    def __init__(self,
    +                 prob_method: List[float]=None):
    +        if prob_method is None:
    +            prob_method = [0.5, 0.5]
    +        self.prob_method = prob_method
    +
    +    def log_transition_ratio(self, tree: Tree, mutation: TreeMutation):
    +        if mutation.kind == "prune":
    +            mutation: PruneMutation = mutation
    +            return self.log_prune_transition_ratio(tree, mutation)
    +        if mutation.kind == "grow":
    +            mutation: GrowMutation = mutation
    +            return self.log_grow_transition_ratio(tree, mutation)
    +        else:
    +            raise NotImplementedError("kind {} not supported".format(mutation.kind))
    +
    +    def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
    +        if mutation.kind == "grow":
    +            mutation: GrowMutation = mutation
    +            return self.log_tree_ratio_grow(model, tree, mutation)
    +        if mutation.kind == "prune":
    +            mutation: PruneMutation = mutation
    +            return self.log_tree_ratio_prune(model, mutation)
    +
    +    def log_likelihood_ratio(self, model: Model, tree: Tree, proposal: TreeMutation):
    +        if proposal.kind == "grow":
    +            proposal: GrowMutation = proposal
    +            log_lik = self.log_likelihood_ratio_grow(model, proposal)
    +        elif proposal.kind == "prune":
    +            proposal: PruneMutation = proposal
    +            log_lik = self.log_likelihood_ratio_prune(model, proposal)
    +        #else:
    +        #    raise NotImplementedError("Only prune and grow mutations supported")
    +        if type(log_lik) == np.ma.core.MaskedConstant:
    +            return -np.inf
    +        return log_lik
    +
    +    @staticmethod
    +    def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation):
    +        return log_grow_ratio(proposal.existing_node, proposal.updated_node.left_child, proposal.updated_node.right_child, model.sigma, model.sigma_m)
    +
    +    @staticmethod
    +    def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation):
    +        return - log_grow_ratio(proposal.updated_node, proposal.existing_node.left_child, proposal.existing_node.right_child, model.sigma, model.sigma_m)
    +
    +    def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation):
    +        prob_prune_selected = - np.log(1)
    +        prob_grow_selected = log_probability_split_within_tree(tree, mutation)
    +
    +        prob_selection_ratio = prob_prune_selected - prob_grow_selected
    +        prune_grow_ratio = np.log(self.prob_method[1] / self.prob_method[0])
    +
    +        return prune_grow_ratio + prob_selection_ratio
    +
    +    def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation):
    +        prob_selection_ratio = log_probability_split_within_node(GrowMutation(mutation.updated_node, mutation.existing_node))
    +        grow_prune_ratio = np.log(self.prob_method[0] / self.prob_method[1])
    +
    +        return grow_prune_ratio + prob_selection_ratio
    +
    +    @staticmethod
    +    def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation):
    +        denominator = log_probability_node_not_split(model, proposal.existing_node)
    +
    +        prob_left_not_split = log_probability_node_not_split(model, proposal.updated_node.left_child)
    +        prob_right_not_split = log_probability_node_not_split(model, proposal.updated_node.right_child)
    +        prob_updated_node_split = log_probability_node_split(model, proposal.updated_node)
    +        prob_chosen_split = log_probability_split_within_tree(tree, proposal)
    +        numerator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
    +
    +        return numerator - denominator
    +
    +    @staticmethod
    +    def log_tree_ratio_prune(model: Model, proposal: PruneMutation):
    +        numerator = log_probability_node_not_split(model, proposal.updated_node)
    +
    +        prob_left_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
    +        prob_right_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
    +        prob_updated_node_split = log_probability_node_split(model, proposal.existing_node)
    +        prob_chosen_split = log_probability_split_within_node(GrowMutation(proposal.updated_node, proposal.existing_node))
    +        denominator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
    +
    +        return numerator - denominator
    +
    +
    +def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) -> float:
    +    """
    +    The log probability of the particular grow mutation being selected conditional on growing a given tree
    +    i.e.
    +    log(P(mutation | node)P(node| tree)
    +
    +    """
    +    prob_split_chosen = log_probability_split_within_node(mutation)
    +    return prob_split_chosen
    +
    +
    +def log_probability_split_within_node(mutation: GrowMutation) -> float:
    +    """
    +    The log probability of the particular grow mutation being selected conditional on growing a given node
    +
    +    i.e.
    +    log(P(splitting_value | splitting_variable, node, grow) * P(splitting_variable | node, grow))
    +    """
    +    splitting_variable = mutation.updated_node.most_recent_split_condition().splitting_variable
    +    splitting_value = mutation.updated_node.most_recent_split_condition().splitting_value
    +    prob_value_selected_within_variable = np.log(mutation.existing_node.data.X.proportion_of_value_in_variable(splitting_variable, splitting_value))
    +    return prob_value_selected_within_variable
    +
    +
    +def log_probability_node_split(model: Model, node: TreeNode):
    +    return np.log(model.alpha * np.power(1 + node.depth, -model.beta))
    +
    +
    +def log_probability_node_not_split(model: Model, node: TreeNode):
    +    return np.log(1. - model.alpha * np.power(1 + node.depth, -model.beta))
    +
    +
    +
    +
    +
    +
    +
    +

    Functions

    +
    +
    +def log_grow_ratio(combined_node: LeafNode, left_node: LeafNode, right_node: LeafNode, sigma: Sigma, sigma_mu: float) +
    +
    +
    +
    + +Expand source code + +
    def log_grow_ratio(combined_node: LeafNode, left_node: LeafNode, right_node: LeafNode, sigma: Sigma, sigma_mu: float):
    +    var = np.power(sigma.current_value(), 2)
    +    var_mu = np.power(sigma_mu, 2)
    +    n = combined_node.data.X.n_obsv
    +    n_l = left_node.data.X.n_obsv
    +    n_r = right_node.data.X.n_obsv
    +
    +    first_term = (var * (var + n * sigma_mu)) / ((var + n_l * var_mu) * (var + n_r * var_mu))
    +    first_term = np.log(np.sqrt(first_term))
    +
    +    left_resp_contribution = np.square(left_node.data.y.summed_y()) / (var + n_l * sigma_mu)
    +    right_resp_contribution = np.square(right_node.data.y.summed_y()) / (var + n_r * sigma_mu)
    +    combined_resp_contribution = np.square(combined_node.data.y.summed_y()) / (var + n * sigma_mu)
    +
    +    resp_contribution = left_resp_contribution + right_resp_contribution - combined_resp_contribution
    +
    +    return first_term + ((var_mu / (2 * var)) * resp_contribution)
    +
    +
    +
    +def log_probability_node_not_split(model: Model, node: TreeNode) +
    +
    +
    +
    + +Expand source code + +
    def log_probability_node_not_split(model: Model, node: TreeNode):
    +    return np.log(1. - model.alpha * np.power(1 + node.depth, -model.beta))
    +
    +
    +
    +def log_probability_node_split(model: Model, node: TreeNode) +
    +
    +
    +
    + +Expand source code + +
    def log_probability_node_split(model: Model, node: TreeNode):
    +    return np.log(model.alpha * np.power(1 + node.depth, -model.beta))
    +
    +
    +
    +def log_probability_split_within_node(mutation: GrowMutation) ‑> float +
    +
    +

    The log probability of the particular grow mutation being selected conditional on growing a given node

    +

    i.e. +log(P(splitting_value | splitting_variable, node, grow) * P(splitting_variable | node, grow))

    +
    + +Expand source code + +
    def log_probability_split_within_node(mutation: GrowMutation) -> float:
    +    """
    +    The log probability of the particular grow mutation being selected conditional on growing a given node
    +
    +    i.e.
    +    log(P(splitting_value | splitting_variable, node, grow) * P(splitting_variable | node, grow))
    +    """
    +    splitting_variable = mutation.updated_node.most_recent_split_condition().splitting_variable
    +    splitting_value = mutation.updated_node.most_recent_split_condition().splitting_value
    +    prob_value_selected_within_variable = np.log(mutation.existing_node.data.X.proportion_of_value_in_variable(splitting_variable, splitting_value))
    +    return prob_value_selected_within_variable
    +
    +
    +
    +def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) ‑> float +
    +
    +

    The log probability of the particular grow mutation being selected conditional on growing a given tree +i.e. +log(P(mutation | node)P(node| tree)

    +
    + +Expand source code + +
    def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) -> float:
    +    """
    +    The log probability of the particular grow mutation being selected conditional on growing a given tree
    +    i.e.
    +    log(P(mutation | node)P(node| tree)
    +
    +    """
    +    prob_split_chosen = log_probability_split_within_node(mutation)
    +    return prob_split_chosen
    +
    +
    +
    +
    +
    +

    Classes

    +
    +
    +class UniformTreeMutationLikelihoodRatio +(prob_method: List[float] = None) +
    +
    +

    Responsible for evaluating the ratio of mutations to the reverse movement

    +
    + +Expand source code + +
    class UniformTreeMutationLikelihoodRatio(TreeMutationLikelihoodRatio):
    +
    +    def __init__(self,
    +                 prob_method: List[float]=None):
    +        if prob_method is None:
    +            prob_method = [0.5, 0.5]
    +        self.prob_method = prob_method
    +
    +    def log_transition_ratio(self, tree: Tree, mutation: TreeMutation):
    +        if mutation.kind == "prune":
    +            mutation: PruneMutation = mutation
    +            return self.log_prune_transition_ratio(tree, mutation)
    +        if mutation.kind == "grow":
    +            mutation: GrowMutation = mutation
    +            return self.log_grow_transition_ratio(tree, mutation)
    +        else:
    +            raise NotImplementedError("kind {} not supported".format(mutation.kind))
    +
    +    def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
    +        if mutation.kind == "grow":
    +            mutation: GrowMutation = mutation
    +            return self.log_tree_ratio_grow(model, tree, mutation)
    +        if mutation.kind == "prune":
    +            mutation: PruneMutation = mutation
    +            return self.log_tree_ratio_prune(model, mutation)
    +
    +    def log_likelihood_ratio(self, model: Model, tree: Tree, proposal: TreeMutation):
    +        if proposal.kind == "grow":
    +            proposal: GrowMutation = proposal
    +            log_lik = self.log_likelihood_ratio_grow(model, proposal)
    +        elif proposal.kind == "prune":
    +            proposal: PruneMutation = proposal
    +            log_lik = self.log_likelihood_ratio_prune(model, proposal)
    +        #else:
    +        #    raise NotImplementedError("Only prune and grow mutations supported")
    +        if type(log_lik) == np.ma.core.MaskedConstant:
    +            return -np.inf
    +        return log_lik
    +
    +    @staticmethod
    +    def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation):
    +        return log_grow_ratio(proposal.existing_node, proposal.updated_node.left_child, proposal.updated_node.right_child, model.sigma, model.sigma_m)
    +
    +    @staticmethod
    +    def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation):
    +        return - log_grow_ratio(proposal.updated_node, proposal.existing_node.left_child, proposal.existing_node.right_child, model.sigma, model.sigma_m)
    +
    +    def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation):
    +        prob_prune_selected = - np.log(1)
    +        prob_grow_selected = log_probability_split_within_tree(tree, mutation)
    +
    +        prob_selection_ratio = prob_prune_selected - prob_grow_selected
    +        prune_grow_ratio = np.log(self.prob_method[1] / self.prob_method[0])
    +
    +        return prune_grow_ratio + prob_selection_ratio
    +
    +    def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation):
    +        prob_selection_ratio = log_probability_split_within_node(GrowMutation(mutation.updated_node, mutation.existing_node))
    +        grow_prune_ratio = np.log(self.prob_method[0] / self.prob_method[1])
    +
    +        return grow_prune_ratio + prob_selection_ratio
    +
    +    @staticmethod
    +    def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation):
    +        denominator = log_probability_node_not_split(model, proposal.existing_node)
    +
    +        prob_left_not_split = log_probability_node_not_split(model, proposal.updated_node.left_child)
    +        prob_right_not_split = log_probability_node_not_split(model, proposal.updated_node.right_child)
    +        prob_updated_node_split = log_probability_node_split(model, proposal.updated_node)
    +        prob_chosen_split = log_probability_split_within_tree(tree, proposal)
    +        numerator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
    +
    +        return numerator - denominator
    +
    +    @staticmethod
    +    def log_tree_ratio_prune(model: Model, proposal: PruneMutation):
    +        numerator = log_probability_node_not_split(model, proposal.updated_node)
    +
    +        prob_left_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
    +        prob_right_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
    +        prob_updated_node_split = log_probability_node_split(model, proposal.existing_node)
    +        prob_chosen_split = log_probability_split_within_node(GrowMutation(proposal.updated_node, proposal.existing_node))
    +        denominator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
    +
    +        return numerator - denominator
    +
    +

    Ancestors

    + +

    Static methods

    +
    +
    +def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation) +
    +
    +
    +
    + +Expand source code + +
    @staticmethod
    +def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation):
    +    return log_grow_ratio(proposal.existing_node, proposal.updated_node.left_child, proposal.updated_node.right_child, model.sigma, model.sigma_m)
    +
    +
    +
    +def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation) +
    +
    +
    +
    + +Expand source code + +
    @staticmethod
    +def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation):
    +    return - log_grow_ratio(proposal.updated_node, proposal.existing_node.left_child, proposal.existing_node.right_child, model.sigma, model.sigma_m)
    +
    +
    +
    +def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation) +
    +
    +
    +
    + +Expand source code + +
    @staticmethod
    +def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation):
    +    denominator = log_probability_node_not_split(model, proposal.existing_node)
    +
    +    prob_left_not_split = log_probability_node_not_split(model, proposal.updated_node.left_child)
    +    prob_right_not_split = log_probability_node_not_split(model, proposal.updated_node.right_child)
    +    prob_updated_node_split = log_probability_node_split(model, proposal.updated_node)
    +    prob_chosen_split = log_probability_split_within_tree(tree, proposal)
    +    numerator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
    +
    +    return numerator - denominator
    +
    +
    +
    +def log_tree_ratio_prune(model: Model, proposal: PruneMutation) +
    +
    +
    +
    + +Expand source code + +
    @staticmethod
    +def log_tree_ratio_prune(model: Model, proposal: PruneMutation):
    +    numerator = log_probability_node_not_split(model, proposal.updated_node)
    +
    +    prob_left_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
    +    prob_right_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
    +    prob_updated_node_split = log_probability_node_split(model, proposal.existing_node)
    +    prob_chosen_split = log_probability_split_within_node(GrowMutation(proposal.updated_node, proposal.existing_node))
    +    denominator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
    +
    +    return numerator - denominator
    +
    +
    +
    +

    Methods

    +
    +
    +def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation) +
    +
    +
    +
    + +Expand source code + +
    def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation):
    +    prob_prune_selected = - np.log(1)
    +    prob_grow_selected = log_probability_split_within_tree(tree, mutation)
    +
    +    prob_selection_ratio = prob_prune_selected - prob_grow_selected
    +    prune_grow_ratio = np.log(self.prob_method[1] / self.prob_method[0])
    +
    +    return prune_grow_ratio + prob_selection_ratio
    +
    +
    +
    +def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation) +
    +
    +
    +
    + +Expand source code + +
    def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation):
    +    prob_selection_ratio = log_probability_split_within_node(GrowMutation(mutation.updated_node, mutation.existing_node))
    +    grow_prune_ratio = np.log(self.prob_method[0] / self.prob_method[1])
    +
    +    return grow_prune_ratio + prob_selection_ratio
    +
    +
    +
    +

    Inherited members

    + +
    +
    +
    +
    + +
    +
    +
    + + + + + + \ No newline at end of file diff --git a/docs/experimental/bartpy/samplers/oblivioustrees/treemutation.html b/docs/experimental/bartpy/samplers/oblivioustrees/treemutation.html index b999cb69..b6f44bd9 100644 --- a/docs/experimental/bartpy/samplers/oblivioustrees/treemutation.html +++ b/docs/experimental/bartpy/samplers/oblivioustrees/treemutation.html @@ -29,9 +29,9 @@ from ...mutation import TreeMutation from ...samplers.sampler import Sampler from ...samplers.scalar import UniformScalarSampler -from ...samplers.treemutation import TreeMutationLikihoodRatio +from ...samplers.treemutation import TreeMutationLikelihoodRatio from ...samplers.treemutation import TreeMutationProposer -from ...samplers.oblivioustrees.likihoodratio import UniformTreeMutationLikihoodRatio +from ...samplers.oblivioustrees.likelihoodratio import UniformTreeMutationLikelihoodRatio from ...samplers.oblivioustrees.proposer import UniformMutationProposer from ...tree import Tree, mutate @@ -41,28 +41,28 @@ A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model - Works by combining a proposer and likihood evaluator into: + Works by combining a proposer and likelihood evaluator into: - propose a mutation - - assess likihood - - accept if likihood higher than a uniform(0, 1) draw + - assess likelihood + - accept if likelihood higher than a uniform(0, 1) draw Parameters ---------- proposer: TreeMutationProposer - likihood_ratio: TreeMutationLikihoodRatio + likelihood_ratio: TreeMutationLikelihoodRatio """ def __init__(self, proposer: TreeMutationProposer, - likihood_ratio: TreeMutationLikihoodRatio, + likelihood_ratio: TreeMutationLikelihoodRatio, scalar_sampler=UniformScalarSampler()): self.proposer = proposer - self.likihood_ratio = likihood_ratio + self.likelihood_ratio = likelihood_ratio self._scalar_sampler = scalar_sampler def sample(self, model: Model, tree: Tree) -> Optional[List[TreeMutation]]: proposals: List[TreeMutation] = self.proposer.propose(tree) - ratio = np.sum([self.likihood_ratio.log_probability_ratio(model, tree, x) for x in proposals]) + ratio = np.sum([self.likelihood_ratio.log_probability_ratio(model, tree, x) for x in proposals]) if self._scalar_sampler.sample() < ratio: return proposals else: @@ -79,8 +79,8 @@ def get_tree_sampler(p_grow: float, p_prune: float): proposer = UniformMutationProposer(p_grow, p_prune) - likihood = UniformTreeMutationLikihoodRatio([p_grow, p_prune]) - return UnconstrainedTreeMutationSampler(proposer, likihood)
    + likelihood = UniformTreeMutationLikelihoodRatio([p_grow, p_prune]) + return UnconstrainedTreeMutationSampler(proposer, likelihood)
  • @@ -102,8 +102,8 @@

    Functions

    def get_tree_sampler(p_grow: float,
                          p_prune: float):
         proposer = UniformMutationProposer(p_grow, p_prune)
    -    likihood = UniformTreeMutationLikihoodRatio([p_grow, p_prune])
    -    return UnconstrainedTreeMutationSampler(proposer, likihood)
    + likelihood = UniformTreeMutationLikelihoodRatio([p_grow, p_prune]) + return UnconstrainedTreeMutationSampler(proposer, likelihood)
    @@ -113,20 +113,20 @@

    Classes

    class UnconstrainedTreeMutationSampler -(proposer: TreeMutationProposer, likihood_ratio: TreeMutationLikihoodRatio, scalar_sampler=<imodels.experimental.bartpy.samplers.scalar.UniformScalarSampler object>) +(proposer: TreeMutationProposer, likelihood_ratio: TreeMutationLikelihoodRatio, scalar_sampler=<imodels.experimental.bartpy.samplers.scalar.UniformScalarSampler object>)

    A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model

    -

    Works by combining a proposer and likihood evaluator into: +

    Works by combining a proposer and likelihood evaluator into: - propose a mutation -- assess likihood -- accept if likihood higher than a uniform(0, 1) draw

    +- assess likelihood +- accept if likelihood higher than a uniform(0, 1) draw

    Parameters

    proposer : TreeMutationProposer
     
    -
    likihood_ratio : TreeMutationLikihoodRatio
    +
    likelihood_ratio : TreeMutationLikelihoodRatio
     
    @@ -138,28 +138,28 @@

    Parameters

    A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model - Works by combining a proposer and likihood evaluator into: + Works by combining a proposer and likelihood evaluator into: - propose a mutation - - assess likihood - - accept if likihood higher than a uniform(0, 1) draw + - assess likelihood + - accept if likelihood higher than a uniform(0, 1) draw Parameters ---------- proposer: TreeMutationProposer - likihood_ratio: TreeMutationLikihoodRatio + likelihood_ratio: TreeMutationLikelihoodRatio """ def __init__(self, proposer: TreeMutationProposer, - likihood_ratio: TreeMutationLikihoodRatio, + likelihood_ratio: TreeMutationLikelihoodRatio, scalar_sampler=UniformScalarSampler()): self.proposer = proposer - self.likihood_ratio = likihood_ratio + self.likelihood_ratio = likelihood_ratio self._scalar_sampler = scalar_sampler def sample(self, model: Model, tree: Tree) -> Optional[List[TreeMutation]]: proposals: List[TreeMutation] = self.proposer.propose(tree) - ratio = np.sum([self.likihood_ratio.log_probability_ratio(model, tree, x) for x in proposals]) + ratio = np.sum([self.likelihood_ratio.log_probability_ratio(model, tree, x) for x in proposals]) if self._scalar_sampler.sample() < ratio: return proposals else: @@ -190,7 +190,7 @@

    Methods

    def sample(self, model: Model, tree: Tree) -> Optional[List[TreeMutation]]:
         proposals: List[TreeMutation] = self.proposer.propose(tree)
    -    ratio = np.sum([self.likihood_ratio.log_probability_ratio(model, tree, x) for x in proposals])
    +    ratio = np.sum([self.likelihood_ratio.log_probability_ratio(model, tree, x) for x in proposals])
         if self._scalar_sampler.sample() < ratio:
             return proposals
         else:
    diff --git a/docs/experimental/bartpy/samplers/treemutation.html b/docs/experimental/bartpy/samplers/treemutation.html
    index 68ac4741..36c0e056 100644
    --- a/docs/experimental/bartpy/samplers/treemutation.html
    +++ b/docs/experimental/bartpy/samplers/treemutation.html
    @@ -37,10 +37,10 @@
         A sampler for tree mutation space.
         Responsible for producing samples of ways to mutate a tree within a model
     
    -    A general schema of implementation is to combine a proposer and likihood evaluator to:
    +    A general schema of implementation is to combine a proposer and likelihood evaluator to:
          - propose a mutation
    -     - assess likihood
    -     - accept if likihood higher than a uniform(0, 1) draw
    +     - assess likelihood
    +     - accept if likelihood higher than a uniform(0, 1) draw
         """
     
         def sample(self, model: Model, tree: Tree) -> Optional[TreeMutation]:
    @@ -74,14 +74,14 @@
             raise NotImplementedError()
     
     
    -class TreeMutationLikihoodRatio(ABC):
    +class TreeMutationLikelihoodRatio(ABC):
         """
         Responsible for evaluating the ratio of mutations to the reverse movement
         """
     
         def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> Tuple[float, tuple, tuple]:
             """
    -        Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
    +        Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
     
             Main access point for the class
     
    @@ -99,7 +99,7 @@
             float
                 logged ratio of likelihoods
             """
    -        log_likelihood_ratio, (l_new, l_old) = self.log_likihood_ratio(model, tree, mutation)
    +        log_likelihood_ratio, (l_new, l_old) = self.log_likelihood_ratio(model, tree, mutation)
             log_transition_ratio, (t_new, t_old) = self.log_transition_ratio(tree, mutation)
             log_prior_ratio, (p_new, p_old) = self.log_tree_ratio(model, tree, mutation)
             bayes_term = log_transition_ratio + log_prior_ratio
    @@ -114,9 +114,9 @@
         @abstractmethod
         def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) -> float:
             """
    -        The logged ratio of the likihood of making the transition to the likihood of making the reverse transition.
    +        The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition.
             e.g. in the case of using only grow and prune mutations:
    -            log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)
    +            log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)
     
             Parameters
             ----------
    @@ -128,14 +128,14 @@
             Returns
             -------
             float
    -            logged likihood ratio
    +            logged likelihood ratio
             """
             raise NotImplementedError()
     
         @abstractmethod
         def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> float:
             """
    -        Logged ratio of the likihood of the tree before and after the mutation
    +        Logged ratio of the likelihood of the tree before and after the mutation
             i.e. the product of the probability of all split nodes being split and all leaf node note being split
     
             Parameters
    @@ -150,15 +150,15 @@
             Returns
             -------
             float
    -            logged likihood ratio
    +            logged likelihood ratio
             """
     
             raise NotImplementedError()
     
         @abstractmethod
    -    def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
    +    def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
             """
    -        The logged ratio of the likihood of all the data points before and after the mutation
    +        The logged ratio of the likelihood of all the data points before and after the mutation
             Generally more complex trees should be able to fit the data better than simple trees
     
             Parameters
    @@ -173,7 +173,7 @@
             Returns
             -------
             float
    -            logged likihood ratio
    +            logged likelihood ratio
             """
             raise NotImplementedError()
    @@ -187,8 +187,8 @@

    Classes

    -
    -class TreeMutationLikihoodRatio +
    +class TreeMutationLikelihoodRatio

    Responsible for evaluating the ratio of mutations to the reverse movement

    @@ -196,14 +196,14 @@

    Classes

    Expand source code -
    class TreeMutationLikihoodRatio(ABC):
    +
    class TreeMutationLikelihoodRatio(ABC):
         """
         Responsible for evaluating the ratio of mutations to the reverse movement
         """
     
         def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> Tuple[float, tuple, tuple]:
             """
    -        Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
    +        Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
     
             Main access point for the class
     
    @@ -221,7 +221,7 @@ 

    Classes

    float logged ratio of likelihoods """ - log_likelihood_ratio, (l_new, l_old) = self.log_likihood_ratio(model, tree, mutation) + log_likelihood_ratio, (l_new, l_old) = self.log_likelihood_ratio(model, tree, mutation) log_transition_ratio, (t_new, t_old) = self.log_transition_ratio(tree, mutation) log_prior_ratio, (p_new, p_old) = self.log_tree_ratio(model, tree, mutation) bayes_term = log_transition_ratio + log_prior_ratio @@ -236,9 +236,9 @@

    Classes

    @abstractmethod def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) -> float: """ - The logged ratio of the likihood of making the transition to the likihood of making the reverse transition. + The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition. e.g. in the case of using only grow and prune mutations: - log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree) + log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree) Parameters ---------- @@ -250,14 +250,14 @@

    Classes

    Returns ------- float - logged likihood ratio + logged likelihood ratio """ raise NotImplementedError() @abstractmethod def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> float: """ - Logged ratio of the likihood of the tree before and after the mutation + Logged ratio of the likelihood of the tree before and after the mutation i.e. the product of the probability of all split nodes being split and all leaf node note being split Parameters @@ -272,15 +272,15 @@

    Classes

    Returns ------- float - logged likihood ratio + logged likelihood ratio """ raise NotImplementedError() @abstractmethod - def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation): + def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation): """ - The logged ratio of the likihood of all the data points before and after the mutation + The logged ratio of the likelihood of all the data points before and after the mutation Generally more complex trees should be able to fit the data better than simple trees Parameters @@ -295,7 +295,7 @@

    Classes

    Returns ------- float - logged likihood ratio + logged likelihood ratio """ raise NotImplementedError()
    @@ -305,16 +305,16 @@

    Ancestors

    Subclasses

    Methods

    -
    -def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) +
    +def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation)
    -

    The logged ratio of the likihood of all the data points before and after the mutation +

    The logged ratio of the likelihood of all the data points before and after the mutation Generally more complex trees should be able to fit the data better than simple trees

    Parameters

    @@ -328,16 +328,16 @@

    Parameters

    Returns

    float
    -
    logged likihood ratio
    +
    logged likelihood ratio
    Expand source code
    @abstractmethod
    -def log_likihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
    +def log_likelihood_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
         """
    -    The logged ratio of the likihood of all the data points before and after the mutation
    +    The logged ratio of the likelihood of all the data points before and after the mutation
         Generally more complex trees should be able to fit the data better than simple trees
     
         Parameters
    @@ -352,16 +352,16 @@ 

    Returns

    Returns ------- float - logged likihood ratio + logged likelihood ratio """ raise NotImplementedError()
    -
    +
    def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) ‑> Tuple[float, tuple, tuple]
    -

    Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement

    +

    Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement

    Main access point for the class

    Parameters

    @@ -383,7 +383,7 @@

    Returns

    def log_probability_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> Tuple[float, tuple, tuple]:
         """
    -    Calculated the ratio of the likihood of a mutation over the likihood of the reverse movement
    +    Calculated the ratio of the likelihood of a mutation over the likelihood of the reverse movement
     
         Main access point for the class
     
    @@ -401,7 +401,7 @@ 

    Returns

    float logged ratio of likelihoods """ - log_likelihood_ratio, (l_new, l_old) = self.log_likihood_ratio(model, tree, mutation) + log_likelihood_ratio, (l_new, l_old) = self.log_likelihood_ratio(model, tree, mutation) log_transition_ratio, (t_new, t_old) = self.log_transition_ratio(tree, mutation) log_prior_ratio, (p_new, p_old) = self.log_tree_ratio(model, tree, mutation) bayes_term = log_transition_ratio + log_prior_ratio @@ -414,13 +414,13 @@

    Returns

    return ratio, (l_new, l_old), (prob_new, prob_old)
    -
    +
    def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) ‑> float
    -

    The logged ratio of the likihood of making the transition to the likihood of making the reverse transition. +

    The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition. e.g. in the case of using only grow and prune mutations: -log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)

    +log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)

    Parameters

    tree : Tree
    @@ -431,7 +431,7 @@

    Parameters

    Returns

    float
    -
    logged likihood ratio
    +
    logged likelihood ratio
    @@ -440,9 +440,9 @@

    Returns

    @abstractmethod
     def log_transition_ratio(self, tree: Tree, mutation: TreeMutation) -> float:
         """
    -    The logged ratio of the likihood of making the transition to the likihood of making the reverse transition.
    +    The logged ratio of the likelihood of making the transition to the likelihood of making the reverse transition.
         e.g. in the case of using only grow and prune mutations:
    -        log(likihood of growing from tree to the post mutation tree / likihood of pruning from the post mutation tree to the tree)
    +        log(likelihood of growing from tree to the post mutation tree / likelihood of pruning from the post mutation tree to the tree)
     
         Parameters
         ----------
    @@ -454,16 +454,16 @@ 

    Returns

    Returns ------- float - logged likihood ratio + logged likelihood ratio """ raise NotImplementedError()
    -
    +
    def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) ‑> float
    -

    Logged ratio of the likihood of the tree before and after the mutation +

    Logged ratio of the likelihood of the tree before and after the mutation i.e. the product of the probability of all split nodes being split and all leaf node note being split

    Parameters

    @@ -477,7 +477,7 @@

    Parameters

    Returns

    float
    -
    logged likihood ratio
    +
    logged likelihood ratio
    @@ -486,7 +486,7 @@

    Returns

    @abstractmethod
     def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation) -> float:
         """
    -    Logged ratio of the likihood of the tree before and after the mutation
    +    Logged ratio of the likelihood of the tree before and after the mutation
         i.e. the product of the probability of all split nodes being split and all leaf node note being split
     
         Parameters
    @@ -501,7 +501,7 @@ 

    Returns

    Returns ------- float - logged likihood ratio + logged likelihood ratio """ raise NotImplementedError()
    @@ -598,10 +598,10 @@

    Returns

    A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model

    -

    A general schema of implementation is to combine a proposer and likihood evaluator to: +

    A general schema of implementation is to combine a proposer and likelihood evaluator to: - propose a mutation -- assess likihood -- accept if likihood higher than a uniform(0, 1) draw

    +- assess likelihood +- accept if likelihood higher than a uniform(0, 1) draw

    Expand source code @@ -611,10 +611,10 @@

    Returns

    A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model - A general schema of implementation is to combine a proposer and likihood evaluator to: + A general schema of implementation is to combine a proposer and likelihood evaluator to: - propose a mutation - - assess likihood - - accept if likihood higher than a uniform(0, 1) draw + - assess likelihood + - accept if likelihood higher than a uniform(0, 1) draw """ def sample(self, model: Model, tree: Tree) -> Optional[TreeMutation]: @@ -675,12 +675,12 @@

    Index 🔍

  • Classes

    • -

      TreeMutationLikihoodRatio

      +

      TreeMutationLikelihoodRatio

    • diff --git a/docs/experimental/bartpy/samplers/unconstrainedtree/index.html b/docs/experimental/bartpy/samplers/unconstrainedtree/index.html index ad416acc..a9afb792 100644 --- a/docs/experimental/bartpy/samplers/unconstrainedtree/index.html +++ b/docs/experimental/bartpy/samplers/unconstrainedtree/index.html @@ -21,7 +21,7 @@

      Sub-modules

      -
      imodels.experimental.bartpy.samplers.unconstrainedtree.likihoodratio
      +
      imodels.experimental.bartpy.samplers.unconstrainedtree.likelihoodratio
      @@ -61,7 +61,7 @@

      Index 🔍

    • Sub-modules

      diff --git a/docs/experimental/bartpy/samplers/unconstrainedtree/likelihoodratio.html b/docs/experimental/bartpy/samplers/unconstrainedtree/likelihoodratio.html new file mode 100644 index 00000000..08331465 --- /dev/null +++ b/docs/experimental/bartpy/samplers/unconstrainedtree/likelihoodratio.html @@ -0,0 +1,721 @@ + + + + + + + + + + + + + + + + +
      +
      +
      +
      + +Expand source code + +
      from typing import List
      +
      +import numpy as np
      +
      +from ...model import Model
      +from ...mutation import TreeMutation, GrowMutation, PruneMutation
      +from ...node import LeafNode, TreeNode
      +from ...samplers.treemutation import TreeMutationLikelihoodRatio
      +from ...sigma import Sigma
      +from ...tree import Tree
      +
      +
      +def log_grow_ratio(combined_node: LeafNode, left_node: LeafNode, right_node: LeafNode, sigma: Sigma, sigma_mu: float):
      +    # deviation: https: // www.cs.ubc.ca / ~murphyk / Papers / bayesGauss.pdf
      +    var = np.power(sigma.current_value(), 2)
      +    var_mu = np.power(sigma_mu, 2)
      +    n = combined_node.data.X.n_obsv
      +    n_l = left_node.data.X.n_obsv
      +    n_r = right_node.data.X.n_obsv
      +
      +    # first_term = (var * (var + n * sigma_mu)) / ((var + n_l * var_mu) * (var + n_r * var_mu))
      +    # first_term = np.log(np.sqrt(first_term))
      +
      +    combined_y_sum = combined_node.data.y.summed_y()
      +    left_y_sum = left_node.data.y.summed_y()
      +    right_y_sum = right_node.data.y.summed_y()
      +
      +    left_resp_contribution = np.square(left_y_sum) / (var + n_l * sigma_mu)
      +    right_resp_contribution = np.square(right_y_sum) / (var + n_r * sigma_mu)
      +    combined_resp_contribution = np.square(combined_y_sum) / (var + n * sigma_mu)
      +
      +    # resp_contribution = left_resp_contribution + right_resp_contribution - combined_resp_contribution
      +
      +    numerator_first = np.log(np.sqrt(((var + n_l * var_mu) * (var + n_r * var_mu))))
      +    numerator_second = (var_mu / (2 * var)) * (left_resp_contribution + right_resp_contribution)
      +
      +    big_model_l = numerator_first + numerator_second
      +
      +    denominator_first = np.log(np.sqrt((var * (var + n * var_mu))))
      +    denominator_second = (var_mu / (2 * var)) * combined_resp_contribution
      +
      +    small_model_l = denominator_first + denominator_second
      +
      +    return big_model_l, small_model_l
      +
      +    # return first_term + ((var_mu / (2 * var)) * resp_contribution)
      +
      +
      +class UniformTreeMutationLikelihoodRatio(TreeMutationLikelihoodRatio):
      +
      +    def __init__(self,
      +                 prob_method: List[float] = None):
      +        if prob_method is None:
      +            prob_method = [0.5, 0.5]
      +        self.prob_method = prob_method
      +
      +    def log_transition_ratio(self, tree: Tree, mutation: TreeMutation):
      +        if mutation.kind == "prune":
      +            mutation: PruneMutation = mutation
      +            return self.log_prune_transition_ratio(tree, mutation)
      +        if mutation.kind == "grow":
      +            mutation: GrowMutation = mutation
      +            return self.log_grow_transition_ratio(tree, mutation)
      +        else:
      +            raise NotImplementedError("kind {} not supported".format(mutation.kind))
      +
      +    def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
      +        if mutation.kind == "grow":
      +            mutation: GrowMutation = mutation
      +            return self.log_tree_ratio_grow(model, tree, mutation)
      +        if mutation.kind == "prune":
      +            mutation: PruneMutation = mutation
      +            return self.log_tree_ratio_prune(model, mutation)
      +
      +    def log_likelihood_ratio(self, model: Model, tree: Tree, proposal: TreeMutation):
      +        if proposal.kind == "grow":
      +            proposal: GrowMutation = proposal
      +            return self.log_likelihood_ratio_grow(model, proposal)
      +        if proposal.kind == "prune":
      +            proposal: PruneMutation = proposal
      +            return self.log_likelihood_ratio_prune(model, proposal)
      +        else:
      +            raise NotImplementedError("Only prune and grow mutations supported")
      +
      +    @staticmethod
      +    def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation):
      +        new_model_l, old_model_l = log_grow_ratio(proposal.existing_node, proposal.updated_node.left_child,
      +                                                  proposal.updated_node.right_child, model.sigma, model.sigma_m)
      +        return (new_model_l - old_model_l), (new_model_l, old_model_l)
      +
      +    @staticmethod
      +    def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation):
      +        old_model_l, new_model_l = log_grow_ratio(proposal.updated_node, proposal.existing_node.left_child,
      +                                                  proposal.existing_node.right_child, model.sigma, model.sigma_m)
      +        return (new_model_l - old_model_l), (new_model_l, old_model_l)
      +
      +    def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation):
      +        prob_prune_selected = - np.log(n_prunable_decision_nodes(tree) + 1)
      +        prob_grow_selected = log_probability_split_within_tree(tree, mutation)
      +
      +        prob_selection_ratio = prob_prune_selected - prob_grow_selected
      +        prune_grow_ratio = np.log(self.prob_method[1] / self.prob_method[0])
      +
      +        numerator = prob_prune_selected
      +        denominator = prob_grow_selected
      +
      +        # return prune_grow_ratio + prob_selection_ratio
      +        return numerator - denominator, (numerator, denominator)
      +
      +    def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation):
      +        if n_splittable_leaf_nodes(tree) == 1:
      +            prob_grow_node_selected = - np.inf  # Infinitely unlikely to be able to prune a null tree
      +        else:
      +            prob_grow_node_selected = - np.log(n_splittable_leaf_nodes(tree) - 1)
      +        prob_split = log_probability_split_within_node(GrowMutation(mutation.updated_node, mutation.existing_node))
      +        prob_grow_selected = prob_grow_node_selected + prob_split
      +
      +        prob_prune_selected = - np.log(n_prunable_decision_nodes(tree))
      +
      +        prob_selection_ratio = prob_grow_selected - prob_prune_selected
      +        grow_prune_ratio = np.log(self.prob_method[0] / self.prob_method[1])
      +
      +        numerator = prob_grow_selected
      +        denominator = prob_prune_selected
      +
      +        # return grow_prune_ratio + prob_selection_ratio
      +
      +        return numerator - denominator, (numerator, denominator)
      +
      +    @staticmethod
      +    def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation):
      +        denominator = log_probability_node_not_split(model, proposal.existing_node)
      +
      +        prob_left_not_split = log_probability_node_not_split(model, proposal.updated_node.left_child)
      +        prob_right_not_split = log_probability_node_not_split(model, proposal.updated_node.right_child)
      +        prob_updated_node_split = log_probability_node_split(model, proposal.updated_node)
      +        prob_chosen_split = log_probability_split_within_tree(tree, proposal)
      +        numerator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
      +
      +        return numerator - denominator, (numerator, denominator)
      +
      +    @staticmethod
      +    def log_tree_ratio_prune(model: Model, proposal: PruneMutation):
      +        numerator = log_probability_node_not_split(model, proposal.updated_node)
      +
      +        prob_left_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
      +        prob_right_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
      +        prob_updated_node_split = log_probability_node_split(model, proposal.existing_node)
      +        prob_chosen_split = log_probability_split_within_node(
      +            GrowMutation(proposal.updated_node, proposal.existing_node))
      +        denominator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
      +
      +        return numerator - denominator, (numerator, denominator)
      +
      +
      +def n_prunable_decision_nodes(tree: Tree) -> int:
      +    """
      +    The number of prunable decision nodes
      +    i.e. how many decision nodes have two leaf children
      +    """
      +    return len(tree.prunable_decision_nodes)
      +
      +
      +def n_splittable_leaf_nodes(tree: Tree) -> int:
      +    """
      +    The number of splittable leaf nodes
      +    i.e. how many leaf nodes have more than one distinct values in their covariate matrix
      +    """
      +    return len(tree.splittable_leaf_nodes)
      +
      +
      +def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) -> float:
      +    """
      +    The log probability of the particular grow mutation being selected conditional on growing a given tree
      +    i.e.
      +    log(P(mutation | node)P(node| tree)
      +
      +    """
      +    prob_node_chosen_to_split_on = - np.log(n_splittable_leaf_nodes(tree))
      +    prob_split_chosen = log_probability_split_within_node(mutation)
      +    return prob_node_chosen_to_split_on + prob_split_chosen
      +
      +
      +def log_probability_split_within_node(mutation: GrowMutation) -> float:
      +    """
      +    The log probability of the particular grow mutation being selected conditional on growing a given node
      +
      +    i.e.
      +    log(P(splitting_value | splitting_variable, node, grow) * P(splitting_variable | node, grow))
      +    """
      +
      +    prob_splitting_variable_selected = - np.log(mutation.existing_node.data.X.n_splittable_variables)
      +    splitting_variable = mutation.updated_node.most_recent_split_condition().splitting_variable
      +    splitting_value = mutation.updated_node.most_recent_split_condition().splitting_value
      +    prob_value_selected_within_variable = np.log(
      +        mutation.existing_node.data.X.proportion_of_value_in_variable(splitting_variable, splitting_value))
      +    return prob_splitting_variable_selected + prob_value_selected_within_variable
      +
      +
      +def log_probability_node_split(model: Model, node: TreeNode):
      +    return np.log(model.alpha * np.power(1 + node.depth, -model.beta))
      +
      +
      +def log_probability_node_not_split(model: Model, node: TreeNode):
      +    return np.log(1. - model.alpha * np.power(1 + node.depth, -model.beta))
      +
      +
      +
      +
      +
      +
      +
      +

      Functions

      +
      +
      +def log_grow_ratio(combined_node: LeafNode, left_node: LeafNode, right_node: LeafNode, sigma: Sigma, sigma_mu: float) +
      +
      +
      +
      + +Expand source code + +
      def log_grow_ratio(combined_node: LeafNode, left_node: LeafNode, right_node: LeafNode, sigma: Sigma, sigma_mu: float):
      +    # deviation: https: // www.cs.ubc.ca / ~murphyk / Papers / bayesGauss.pdf
      +    var = np.power(sigma.current_value(), 2)
      +    var_mu = np.power(sigma_mu, 2)
      +    n = combined_node.data.X.n_obsv
      +    n_l = left_node.data.X.n_obsv
      +    n_r = right_node.data.X.n_obsv
      +
      +    # first_term = (var * (var + n * sigma_mu)) / ((var + n_l * var_mu) * (var + n_r * var_mu))
      +    # first_term = np.log(np.sqrt(first_term))
      +
      +    combined_y_sum = combined_node.data.y.summed_y()
      +    left_y_sum = left_node.data.y.summed_y()
      +    right_y_sum = right_node.data.y.summed_y()
      +
      +    left_resp_contribution = np.square(left_y_sum) / (var + n_l * sigma_mu)
      +    right_resp_contribution = np.square(right_y_sum) / (var + n_r * sigma_mu)
      +    combined_resp_contribution = np.square(combined_y_sum) / (var + n * sigma_mu)
      +
      +    # resp_contribution = left_resp_contribution + right_resp_contribution - combined_resp_contribution
      +
      +    numerator_first = np.log(np.sqrt(((var + n_l * var_mu) * (var + n_r * var_mu))))
      +    numerator_second = (var_mu / (2 * var)) * (left_resp_contribution + right_resp_contribution)
      +
      +    big_model_l = numerator_first + numerator_second
      +
      +    denominator_first = np.log(np.sqrt((var * (var + n * var_mu))))
      +    denominator_second = (var_mu / (2 * var)) * combined_resp_contribution
      +
      +    small_model_l = denominator_first + denominator_second
      +
      +    return big_model_l, small_model_l
      +
      +    # return first_term + ((var_mu / (2 * var)) * resp_contribution)
      +
      +
      +
      +def log_probability_node_not_split(model: Model, node: TreeNode) +
      +
      +
      +
      + +Expand source code + +
      def log_probability_node_not_split(model: Model, node: TreeNode):
      +    return np.log(1. - model.alpha * np.power(1 + node.depth, -model.beta))
      +
      +
      +
      +def log_probability_node_split(model: Model, node: TreeNode) +
      +
      +
      +
      + +Expand source code + +
      def log_probability_node_split(model: Model, node: TreeNode):
      +    return np.log(model.alpha * np.power(1 + node.depth, -model.beta))
      +
      +
      +
      +def log_probability_split_within_node(mutation: GrowMutation) ‑> float +
      +
      +

      The log probability of the particular grow mutation being selected conditional on growing a given node

      +

      i.e. +log(P(splitting_value | splitting_variable, node, grow) * P(splitting_variable | node, grow))

      +
      + +Expand source code + +
      def log_probability_split_within_node(mutation: GrowMutation) -> float:
      +    """
      +    The log probability of the particular grow mutation being selected conditional on growing a given node
      +
      +    i.e.
      +    log(P(splitting_value | splitting_variable, node, grow) * P(splitting_variable | node, grow))
      +    """
      +
      +    prob_splitting_variable_selected = - np.log(mutation.existing_node.data.X.n_splittable_variables)
      +    splitting_variable = mutation.updated_node.most_recent_split_condition().splitting_variable
      +    splitting_value = mutation.updated_node.most_recent_split_condition().splitting_value
      +    prob_value_selected_within_variable = np.log(
      +        mutation.existing_node.data.X.proportion_of_value_in_variable(splitting_variable, splitting_value))
      +    return prob_splitting_variable_selected + prob_value_selected_within_variable
      +
      +
      +
      +def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) ‑> float +
      +
      +

      The log probability of the particular grow mutation being selected conditional on growing a given tree +i.e. +log(P(mutation | node)P(node| tree)

      +
      + +Expand source code + +
      def log_probability_split_within_tree(tree: Tree, mutation: GrowMutation) -> float:
      +    """
      +    The log probability of the particular grow mutation being selected conditional on growing a given tree
      +    i.e.
      +    log(P(mutation | node)P(node| tree)
      +
      +    """
      +    prob_node_chosen_to_split_on = - np.log(n_splittable_leaf_nodes(tree))
      +    prob_split_chosen = log_probability_split_within_node(mutation)
      +    return prob_node_chosen_to_split_on + prob_split_chosen
      +
      +
      +
      +def n_prunable_decision_nodes(tree: Tree) ‑> int +
      +
      +

      The number of prunable decision nodes +i.e. how many decision nodes have two leaf children

      +
      + +Expand source code + +
      def n_prunable_decision_nodes(tree: Tree) -> int:
      +    """
      +    The number of prunable decision nodes
      +    i.e. how many decision nodes have two leaf children
      +    """
      +    return len(tree.prunable_decision_nodes)
      +
      +
      +
      +def n_splittable_leaf_nodes(tree: Tree) ‑> int +
      +
      +

      The number of splittable leaf nodes +i.e. how many leaf nodes have more than one distinct values in their covariate matrix

      +
      + +Expand source code + +
      def n_splittable_leaf_nodes(tree: Tree) -> int:
      +    """
      +    The number of splittable leaf nodes
      +    i.e. how many leaf nodes have more than one distinct values in their covariate matrix
      +    """
      +    return len(tree.splittable_leaf_nodes)
      +
      +
      +
      +
      +
      +

      Classes

      +
      +
      +class UniformTreeMutationLikelihoodRatio +(prob_method: List[float] = None) +
      +
      +

      Responsible for evaluating the ratio of mutations to the reverse movement

      +
      + +Expand source code + +
      class UniformTreeMutationLikelihoodRatio(TreeMutationLikelihoodRatio):
      +
      +    def __init__(self,
      +                 prob_method: List[float] = None):
      +        if prob_method is None:
      +            prob_method = [0.5, 0.5]
      +        self.prob_method = prob_method
      +
      +    def log_transition_ratio(self, tree: Tree, mutation: TreeMutation):
      +        if mutation.kind == "prune":
      +            mutation: PruneMutation = mutation
      +            return self.log_prune_transition_ratio(tree, mutation)
      +        if mutation.kind == "grow":
      +            mutation: GrowMutation = mutation
      +            return self.log_grow_transition_ratio(tree, mutation)
      +        else:
      +            raise NotImplementedError("kind {} not supported".format(mutation.kind))
      +
      +    def log_tree_ratio(self, model: Model, tree: Tree, mutation: TreeMutation):
      +        if mutation.kind == "grow":
      +            mutation: GrowMutation = mutation
      +            return self.log_tree_ratio_grow(model, tree, mutation)
      +        if mutation.kind == "prune":
      +            mutation: PruneMutation = mutation
      +            return self.log_tree_ratio_prune(model, mutation)
      +
      +    def log_likelihood_ratio(self, model: Model, tree: Tree, proposal: TreeMutation):
      +        if proposal.kind == "grow":
      +            proposal: GrowMutation = proposal
      +            return self.log_likelihood_ratio_grow(model, proposal)
      +        if proposal.kind == "prune":
      +            proposal: PruneMutation = proposal
      +            return self.log_likelihood_ratio_prune(model, proposal)
      +        else:
      +            raise NotImplementedError("Only prune and grow mutations supported")
      +
      +    @staticmethod
      +    def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation):
      +        new_model_l, old_model_l = log_grow_ratio(proposal.existing_node, proposal.updated_node.left_child,
      +                                                  proposal.updated_node.right_child, model.sigma, model.sigma_m)
      +        return (new_model_l - old_model_l), (new_model_l, old_model_l)
      +
      +    @staticmethod
      +    def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation):
      +        old_model_l, new_model_l = log_grow_ratio(proposal.updated_node, proposal.existing_node.left_child,
      +                                                  proposal.existing_node.right_child, model.sigma, model.sigma_m)
      +        return (new_model_l - old_model_l), (new_model_l, old_model_l)
      +
      +    def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation):
      +        prob_prune_selected = - np.log(n_prunable_decision_nodes(tree) + 1)
      +        prob_grow_selected = log_probability_split_within_tree(tree, mutation)
      +
      +        prob_selection_ratio = prob_prune_selected - prob_grow_selected
      +        prune_grow_ratio = np.log(self.prob_method[1] / self.prob_method[0])
      +
      +        numerator = prob_prune_selected
      +        denominator = prob_grow_selected
      +
      +        # return prune_grow_ratio + prob_selection_ratio
      +        return numerator - denominator, (numerator, denominator)
      +
      +    def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation):
      +        if n_splittable_leaf_nodes(tree) == 1:
      +            prob_grow_node_selected = - np.inf  # Infinitely unlikely to be able to prune a null tree
      +        else:
      +            prob_grow_node_selected = - np.log(n_splittable_leaf_nodes(tree) - 1)
      +        prob_split = log_probability_split_within_node(GrowMutation(mutation.updated_node, mutation.existing_node))
      +        prob_grow_selected = prob_grow_node_selected + prob_split
      +
      +        prob_prune_selected = - np.log(n_prunable_decision_nodes(tree))
      +
      +        prob_selection_ratio = prob_grow_selected - prob_prune_selected
      +        grow_prune_ratio = np.log(self.prob_method[0] / self.prob_method[1])
      +
      +        numerator = prob_grow_selected
      +        denominator = prob_prune_selected
      +
      +        # return grow_prune_ratio + prob_selection_ratio
      +
      +        return numerator - denominator, (numerator, denominator)
      +
      +    @staticmethod
      +    def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation):
      +        denominator = log_probability_node_not_split(model, proposal.existing_node)
      +
      +        prob_left_not_split = log_probability_node_not_split(model, proposal.updated_node.left_child)
      +        prob_right_not_split = log_probability_node_not_split(model, proposal.updated_node.right_child)
      +        prob_updated_node_split = log_probability_node_split(model, proposal.updated_node)
      +        prob_chosen_split = log_probability_split_within_tree(tree, proposal)
      +        numerator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
      +
      +        return numerator - denominator, (numerator, denominator)
      +
      +    @staticmethod
      +    def log_tree_ratio_prune(model: Model, proposal: PruneMutation):
      +        numerator = log_probability_node_not_split(model, proposal.updated_node)
      +
      +        prob_left_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
      +        prob_right_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
      +        prob_updated_node_split = log_probability_node_split(model, proposal.existing_node)
      +        prob_chosen_split = log_probability_split_within_node(
      +            GrowMutation(proposal.updated_node, proposal.existing_node))
      +        denominator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
      +
      +        return numerator - denominator, (numerator, denominator)
      +
      +

      Ancestors

      + +

      Static methods

      +
      +
      +def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation) +
      +
      +
      +
      + +Expand source code + +
      @staticmethod
      +def log_likelihood_ratio_grow(model: Model, proposal: TreeMutation):
      +    new_model_l, old_model_l = log_grow_ratio(proposal.existing_node, proposal.updated_node.left_child,
      +                                              proposal.updated_node.right_child, model.sigma, model.sigma_m)
      +    return (new_model_l - old_model_l), (new_model_l, old_model_l)
      +
      +
      +
      +def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation) +
      +
      +
      +
      + +Expand source code + +
      @staticmethod
      +def log_likelihood_ratio_prune(model: Model, proposal: TreeMutation):
      +    old_model_l, new_model_l = log_grow_ratio(proposal.updated_node, proposal.existing_node.left_child,
      +                                              proposal.existing_node.right_child, model.sigma, model.sigma_m)
      +    return (new_model_l - old_model_l), (new_model_l, old_model_l)
      +
      +
      +
      +def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation) +
      +
      +
      +
      + +Expand source code + +
      @staticmethod
      +def log_tree_ratio_grow(model: Model, tree: Tree, proposal: GrowMutation):
      +    denominator = log_probability_node_not_split(model, proposal.existing_node)
      +
      +    prob_left_not_split = log_probability_node_not_split(model, proposal.updated_node.left_child)
      +    prob_right_not_split = log_probability_node_not_split(model, proposal.updated_node.right_child)
      +    prob_updated_node_split = log_probability_node_split(model, proposal.updated_node)
      +    prob_chosen_split = log_probability_split_within_tree(tree, proposal)
      +    numerator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
      +
      +    return numerator - denominator, (numerator, denominator)
      +
      +
      +
      +def log_tree_ratio_prune(model: Model, proposal: PruneMutation) +
      +
      +
      +
      + +Expand source code + +
      @staticmethod
      +def log_tree_ratio_prune(model: Model, proposal: PruneMutation):
      +    numerator = log_probability_node_not_split(model, proposal.updated_node)
      +
      +    prob_left_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
      +    prob_right_not_split = log_probability_node_not_split(model, proposal.existing_node.left_child)
      +    prob_updated_node_split = log_probability_node_split(model, proposal.existing_node)
      +    prob_chosen_split = log_probability_split_within_node(
      +        GrowMutation(proposal.updated_node, proposal.existing_node))
      +    denominator = prob_left_not_split + prob_right_not_split + prob_updated_node_split + prob_chosen_split
      +
      +    return numerator - denominator, (numerator, denominator)
      +
      +
      +
      +

      Methods

      +
      +
      +def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation) +
      +
      +
      +
      + +Expand source code + +
      def log_grow_transition_ratio(self, tree: Tree, mutation: GrowMutation):
      +    prob_prune_selected = - np.log(n_prunable_decision_nodes(tree) + 1)
      +    prob_grow_selected = log_probability_split_within_tree(tree, mutation)
      +
      +    prob_selection_ratio = prob_prune_selected - prob_grow_selected
      +    prune_grow_ratio = np.log(self.prob_method[1] / self.prob_method[0])
      +
      +    numerator = prob_prune_selected
      +    denominator = prob_grow_selected
      +
      +    # return prune_grow_ratio + prob_selection_ratio
      +    return numerator - denominator, (numerator, denominator)
      +
      +
      +
      +def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation) +
      +
      +
      +
      + +Expand source code + +
      def log_prune_transition_ratio(self, tree: Tree, mutation: PruneMutation):
      +    if n_splittable_leaf_nodes(tree) == 1:
      +        prob_grow_node_selected = - np.inf  # Infinitely unlikely to be able to prune a null tree
      +    else:
      +        prob_grow_node_selected = - np.log(n_splittable_leaf_nodes(tree) - 1)
      +    prob_split = log_probability_split_within_node(GrowMutation(mutation.updated_node, mutation.existing_node))
      +    prob_grow_selected = prob_grow_node_selected + prob_split
      +
      +    prob_prune_selected = - np.log(n_prunable_decision_nodes(tree))
      +
      +    prob_selection_ratio = prob_grow_selected - prob_prune_selected
      +    grow_prune_ratio = np.log(self.prob_method[0] / self.prob_method[1])
      +
      +    numerator = prob_grow_selected
      +    denominator = prob_prune_selected
      +
      +    # return grow_prune_ratio + prob_selection_ratio
      +
      +    return numerator - denominator, (numerator, denominator)
      +
      +
      +
      +

      Inherited members

      + +
      +
      +
      +
      + +
      +
      +
      + + + + + + \ No newline at end of file diff --git a/docs/experimental/bartpy/samplers/unconstrainedtree/treemutation.html b/docs/experimental/bartpy/samplers/unconstrainedtree/treemutation.html index 345bc2ca..be0c6c93 100644 --- a/docs/experimental/bartpy/samplers/unconstrainedtree/treemutation.html +++ b/docs/experimental/bartpy/samplers/unconstrainedtree/treemutation.html @@ -29,9 +29,9 @@ from ...mutation import TreeMutation from ...samplers.sampler import Sampler from ...samplers.scalar import UniformScalarSampler -from ...samplers.treemutation import TreeMutationLikihoodRatio +from ...samplers.treemutation import TreeMutationLikelihoodRatio from ...samplers.treemutation import TreeMutationProposer -from ...samplers.unconstrainedtree.likihoodratio import UniformTreeMutationLikihoodRatio +from ...samplers.unconstrainedtree.likelihoodratio import UniformTreeMutationLikelihoodRatio from ...samplers.unconstrainedtree.proposer import UniformMutationProposer from ...tree import Tree, mutate @@ -41,28 +41,28 @@ A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model - Works by combining a proposer and likihood evaluator into: + Works by combining a proposer and likelihood evaluator into: - propose a mutation - - assess likihood - - accept if likihood higher than a uniform(0, 1) draw + - assess likelihood + - accept if likelihood higher than a uniform(0, 1) draw Parameters ---------- proposer: TreeMutationProposer - likihood_ratio: TreeMutationLikihoodRatio + likelihood_ratio: TreeMutationLikelihoodRatio """ def __init__(self, proposer: TreeMutationProposer, - likihood_ratio: TreeMutationLikihoodRatio, + likelihood_ratio: TreeMutationLikelihoodRatio, scalar_sampler=UniformScalarSampler()): self.proposer = proposer - self.likihood_ratio = likihood_ratio + self.likelihood_ratio = likelihood_ratio self._scalar_sampler = scalar_sampler def sample(self, model: Model, tree: Tree) -> (Optional[TreeMutation], float): proposal = self.proposer.propose(tree) - ratio, (l_new, l_old), (prob_new, prob_old) = self.likihood_ratio.log_probability_ratio(model, tree, proposal) + ratio, (l_new, l_old), (prob_new, prob_old) = self.likelihood_ratio.log_probability_ratio(model, tree, proposal) if self._scalar_sampler.sample() < ratio: return proposal, np.exp(l_new) - np.exp(l_old), np.exp(prob_new) - np.exp(prob_old) else: @@ -78,8 +78,8 @@ def get_tree_sampler(p_grow: float, p_prune: float) -> Sampler: proposer = UniformMutationProposer([p_grow, p_prune]) - likihood = UniformTreeMutationLikihoodRatio([p_grow, p_prune]) - return UnconstrainedTreeMutationSampler(proposer, likihood)
  • + likelihood = UniformTreeMutationLikelihoodRatio([p_grow, p_prune]) + return UnconstrainedTreeMutationSampler(proposer, likelihood)
    @@ -101,8 +101,8 @@

    Functions

    def get_tree_sampler(p_grow: float,
                          p_prune: float) -> Sampler:
         proposer = UniformMutationProposer([p_grow, p_prune])
    -    likihood = UniformTreeMutationLikihoodRatio([p_grow, p_prune])
    -    return UnconstrainedTreeMutationSampler(proposer, likihood)
    + likelihood = UniformTreeMutationLikelihoodRatio([p_grow, p_prune]) + return UnconstrainedTreeMutationSampler(proposer, likelihood)
    @@ -112,20 +112,20 @@

    Classes

    class UnconstrainedTreeMutationSampler -(proposer: TreeMutationProposer, likihood_ratio: TreeMutationLikihoodRatio, scalar_sampler=<imodels.experimental.bartpy.samplers.scalar.UniformScalarSampler object>) +(proposer: TreeMutationProposer, likelihood_ratio: TreeMutationLikelihoodRatio, scalar_sampler=<imodels.experimental.bartpy.samplers.scalar.UniformScalarSampler object>)

    A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model

    -

    Works by combining a proposer and likihood evaluator into: +

    Works by combining a proposer and likelihood evaluator into: - propose a mutation -- assess likihood -- accept if likihood higher than a uniform(0, 1) draw

    +- assess likelihood +- accept if likelihood higher than a uniform(0, 1) draw

    Parameters

    proposer : TreeMutationProposer
     
    -
    likihood_ratio : TreeMutationLikihoodRatio
    +
    likelihood_ratio : TreeMutationLikelihoodRatio
     
    @@ -137,28 +137,28 @@

    Parameters

    A sampler for tree mutation space. Responsible for producing samples of ways to mutate a tree within a model - Works by combining a proposer and likihood evaluator into: + Works by combining a proposer and likelihood evaluator into: - propose a mutation - - assess likihood - - accept if likihood higher than a uniform(0, 1) draw + - assess likelihood + - accept if likelihood higher than a uniform(0, 1) draw Parameters ---------- proposer: TreeMutationProposer - likihood_ratio: TreeMutationLikihoodRatio + likelihood_ratio: TreeMutationLikelihoodRatio """ def __init__(self, proposer: TreeMutationProposer, - likihood_ratio: TreeMutationLikihoodRatio, + likelihood_ratio: TreeMutationLikelihoodRatio, scalar_sampler=UniformScalarSampler()): self.proposer = proposer - self.likihood_ratio = likihood_ratio + self.likelihood_ratio = likelihood_ratio self._scalar_sampler = scalar_sampler def sample(self, model: Model, tree: Tree) -> (Optional[TreeMutation], float): proposal = self.proposer.propose(tree) - ratio, (l_new, l_old), (prob_new, prob_old) = self.likihood_ratio.log_probability_ratio(model, tree, proposal) + ratio, (l_new, l_old), (prob_new, prob_old) = self.likelihood_ratio.log_probability_ratio(model, tree, proposal) if self._scalar_sampler.sample() < ratio: return proposal, np.exp(l_new) - np.exp(l_old), np.exp(prob_new) - np.exp(prob_old) else: @@ -188,7 +188,7 @@

    Methods

    def sample(self, model: Model, tree: Tree) -> (Optional[TreeMutation], float):
         proposal = self.proposer.propose(tree)
    -    ratio, (l_new, l_old), (prob_new, prob_old) = self.likihood_ratio.log_probability_ratio(model, tree, proposal)
    +    ratio, (l_new, l_old), (prob_new, prob_old) = self.likelihood_ratio.log_probability_ratio(model, tree, proposal)
         if self._scalar_sampler.sample() < ratio:
             return proposal, np.exp(l_new) - np.exp(l_old), np.exp(prob_new) - np.exp(prob_old)
         else:
    diff --git a/docs/experimental/bartpy/sklearnmodel.html b/docs/experimental/bartpy/sklearnmodel.html
    index 94af2849..fe9cf5d3 100644
    --- a/docs/experimental/bartpy/sklearnmodel.html
    +++ b/docs/experimental/bartpy/sklearnmodel.html
    @@ -464,7 +464,7 @@
                 [mean_squared_error(self.data.y.unnormalize_y(preds), y) for preds in predictions_transformed])
             return predictions_std
     
    -    def chain_precitions(self, X, chain_number):
    +    def chain_predictions(self, X, chain_number):
             predictions_transformed = self._chain_pred_arr(X, chain_number)
             preds_arr = [self.data.y.unnormalize_y(preds) for preds in predictions_transformed]
             return preds_arr
    @@ -1925,7 +1925,7 @@ 

    Parameters

    [mean_squared_error(self.data.y.unnormalize_y(preds), y) for preds in predictions_transformed]) return predictions_std - def chain_precitions(self, X, chain_number): + def chain_predictions(self, X, chain_number): predictions_transformed = self._chain_pred_arr(X, chain_number) preds_arr = [self.data.y.unnormalize_y(preds) for preds in predictions_transformed] return preds_arr @@ -2284,8 +2284,8 @@

    Methods

    return predictions_std
    -
    -def chain_precitions(self, X, chain_number) +
    +def chain_predictions(self, X, chain_number)
    @@ -2293,7 +2293,7 @@

    Methods

    Expand source code -
    def chain_precitions(self, X, chain_number):
    +
    def chain_predictions(self, X, chain_number):
         predictions_transformed = self._chain_pred_arr(X, chain_number)
         preds_arr = [self.data.y.unnormalize_y(preds) for preds in predictions_transformed]
         return preds_arr
    @@ -2768,7 +2768,7 @@

    acceptance_trace
  • between_chains_var
  • chain_mse_std
  • -
  • chain_precitions
  • +
  • chain_predictions
  • complexity_
  • f_chains
  • f_delayed_chains
  • diff --git a/docs/experimental/figs_ensembles.html b/docs/experimental/figs_ensembles.html index 05cf5001..82772944 100644 --- a/docs/experimental/figs_ensembles.html +++ b/docs/experimental/figs_ensembles.html @@ -153,7 +153,7 @@ def _init_decision_function(self): """Sets decision function based on prediction_task """ - # used by sklearn GrriidSearchCV, BaggingClassifier + # used by sklearn GridSearchCV, BaggingClassifier if self.prediction_task == 'classification': decision_function = lambda x: self.predict_proba(x)[:, 1] elif self.prediction_task == 'regression': @@ -632,7 +632,7 @@

    Classes

    def _init_decision_function(self): """Sets decision function based on prediction_task """ - # used by sklearn GrriidSearchCV, BaggingClassifier + # used by sklearn GridSearchCV, BaggingClassifier if self.prediction_task == 'classification': decision_function = lambda x: self.predict_proba(x)[:, 1] elif self.prediction_task == 'regression': diff --git a/docs/rule_list/bayesian_rule_list/brl_util.html b/docs/rule_list/bayesian_rule_list/brl_util.html index 80f699cd..32dd4506 100644 --- a/docs/rule_list/bayesian_rule_list/brl_util.html +++ b/docs/rule_list/bayesian_rule_list/brl_util.html @@ -62,7 +62,7 @@ # dictionary whose keys are a string Pickle-dump of the antecedent list d, and # whose values are a list [a,b] where a is (proportional to) the log posterior of # d, and b is the number of times d is present in the MCMC samples. -# - d_star - the BRL-point antecedent list. A list of indicies corresponding to +# - d_star - the BRL-point antecedent list. A list of indices corresponding to # variable "itemsets." # - itemsets - A list of itemsets. itemsets[d_star[i]] is the antecedent in # position i on the BRL-point list @@ -248,7 +248,7 @@ minrulesize = int(floor(avgrulesize)) maxrulesize = int(ceil(avgrulesize)) # Run through all perms again - likelihds = [] + likelihoods = [] d_ts = [] beta_Z, logalpha_pmf, logbeta_pmf = prior_calculations(lbda, len(X), eta, maxlhs) # get the constants needed to compute the prior @@ -267,11 +267,11 @@ # Compute the likelihood R_t = d_t.index(0) N_t = compute_rule_usage(d_t, R_t, X, Y) - likelihds.append( + likelihoods.append( fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len)) - likelihds = array(likelihds) - d_star = d_ts[likelihds.argmax()] + likelihoods = array(likelihoods) + d_star = d_ts[likelihoods.argmax()] except RuntimeWarning: # This can happen if all perms are identically [0], or if no soln is found within the len and width bounds (probably the chains didn't converge) print('No suitable point estimate found') @@ -450,7 +450,7 @@ move_probs = array(move_probs_default) Jratios = array([1., move_probs[2] / float(move_probs[1]), move_probs[1] / float(move_probs[2])]) u = random.random() - # First we will find the indicies for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location + # First we will find the indices for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location if u < sum(move_probs[:1]): # This is an on-list move. step = 'move' @@ -509,18 +509,18 @@ def fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len): '''# Compute log posterior ''' - logliklihood = fn_logliklihood(d_t, N_t, R_t, alpha) + loglikelihood = fn_loglikelihood(d_t, N_t, R_t, alpha) logprior = fn_logprior(d_t, R_t, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len) - return logliklihood + logprior + return loglikelihood + logprior -def fn_logliklihood(d_t, N_t, R_t, alpha): +def fn_loglikelihood(d_t, N_t, R_t, alpha): '''Compute log likelihood ''' gammaln_Nt_jk = gammaln(N_t + alpha) gammaln_Nt_j = gammaln(sum(N_t + alpha, 1)) - logliklihood = sum(gammaln_Nt_jk) - sum(gammaln_Nt_j) - return logliklihood + loglikelihood = sum(gammaln_Nt_jk) - sum(gammaln_Nt_j) + return loglikelihood def fn_logprior(d_t, R_t, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len): @@ -684,8 +684,8 @@

    Functions

    return [0., 0.]

    -
    -def fn_logliklihood(d_t, N_t, R_t, alpha) +
    +def fn_loglikelihood(d_t, N_t, R_t, alpha)

    Compute log likelihood

    @@ -693,13 +693,13 @@

    Functions

    Expand source code -
    def fn_logliklihood(d_t, N_t, R_t, alpha):
    +
    def fn_loglikelihood(d_t, N_t, R_t, alpha):
         '''Compute log likelihood
         '''
         gammaln_Nt_jk = gammaln(N_t + alpha)
         gammaln_Nt_j = gammaln(sum(N_t + alpha, 1))
    -    logliklihood = sum(gammaln_Nt_jk) - sum(gammaln_Nt_j)
    -    return logliklihood
    + loglikelihood = sum(gammaln_Nt_jk) - sum(gammaln_Nt_j) + return loglikelihood
    @@ -714,9 +714,9 @@

    Functions

    def fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len):
         '''# Compute log posterior
         '''
    -    logliklihood = fn_logliklihood(d_t, N_t, R_t, alpha)
    +    loglikelihood = fn_loglikelihood(d_t, N_t, R_t, alpha)
         logprior = fn_logprior(d_t, R_t, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len)
    -    return logliklihood + logprior
    + return loglikelihood + logprior
    @@ -859,7 +859,7 @@

    Functions

    minrulesize = int(floor(avgrulesize)) maxrulesize = int(ceil(avgrulesize)) # Run through all perms again - likelihds = [] + likelihoods = [] d_ts = [] beta_Z, logalpha_pmf, logbeta_pmf = prior_calculations(lbda, len(X), eta, maxlhs) # get the constants needed to compute the prior @@ -878,11 +878,11 @@

    Functions

    # Compute the likelihood R_t = d_t.index(0) N_t = compute_rule_usage(d_t, R_t, X, Y) - likelihds.append( + likelihoods.append( fn_logposterior(d_t, R_t, N_t, alpha, logalpha_pmf, logbeta_pmf, maxlhs, beta_Z, nruleslen, lhs_len)) - likelihds = array(likelihds) - d_star = d_ts[likelihds.argmax()] + likelihoods = array(likelihoods) + d_star = d_ts[likelihoods.argmax()] except RuntimeWarning: # This can happen if all perms are identically [0], or if no soln is found within the len and width bounds (probably the chains didn't converge) print('No suitable point estimate found') @@ -1130,7 +1130,7 @@

    Functions

    move_probs = array(move_probs_default) Jratios = array([1., move_probs[2] / float(move_probs[1]), move_probs[1] / float(move_probs[2])]) u = random.random() - # First we will find the indicies for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location + # First we will find the indices for the insertion-deletion. indx1 is the item to be moved, indx2 is the new location if u < sum(move_probs[:1]): # This is an on-list move. step = 'move' @@ -1246,7 +1246,7 @@

    Index 🔍

  • bayesdl_mcmc
  • compute_rule_usage
  • default_permsdic
  • -
  • fn_logliklihood
  • +
  • fn_loglikelihood
  • fn_logposterior
  • fn_logprior
  • gelmanrubin
  • diff --git a/docs/rule_set/brs.html b/docs/rule_set/brs.html index cb0e1b1d..a3aebffd 100644 --- a/docs/rule_set/brs.html +++ b/docs/rule_set/brs.html @@ -256,9 +256,9 @@ self.pattern_space[k] = self.pattern_space[k] + tmp def _generate_rules(self, X, y, verbose): - '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy - there are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest. - If maxlen is big, fpgrowh tends to generate too many rules that overflow the memories. + '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy. + There are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest. + If maxlen is big, fpgrowth tends to generate too many rules that overflow the memory. ''' df = 1 - X # df has negative associations @@ -845,9 +845,9 @@

    Params

    self.pattern_space[k] = self.pattern_space[k] + tmp def _generate_rules(self, X, y, verbose): - '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy - there are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest. - If maxlen is big, fpgrowh tends to generate too many rules that overflow the memories. + '''This function generates rules that satisfy supp and maxlen using fpgrowth, then it selects the top n_rules rules that make data have the biggest decrease in entropy. + There are two ways to generate rules. fpgrowth can handle cases where the maxlen is small. If maxlen<=3, fpgrowth can generates rules much faster than randomforest. + If maxlen is big, fpgrowth tends to generate too many rules that overflow the memory. ''' df = 1 - X # df has negative associations diff --git a/docs/rule_set/slipper_util.html b/docs/rule_set/slipper_util.html index 8744ffb0..41b5cc27 100644 --- a/docs/rule_set/slipper_util.html +++ b/docs/rule_set/slipper_util.html @@ -60,7 +60,7 @@ def _condition_classify(self, X, condition): """ - Helper funciton to make classificaitons for a condition + Helper function to make classifications for a condition in a rule """ @@ -88,7 +88,7 @@ return preds def _get_design_matrices(self, X, y, rule): - """ produce design matrices used in most equaitons""" + """ produce design matrices used in most equations""" preds = self._rule_predict(X, rule) W_plus_idx = np.where((preds == 1) & (y == 1)) @@ -113,7 +113,7 @@ def _grow_rule(self, X, y): """ Starts with empty conjunction of conditions and - greedily adds rules to mazimize Z_tilda + greedily adds rules to maximize Z_tilde """ stop_condition = False @@ -137,11 +137,11 @@ for A_c in pivots ] - # get max Z_tilda and update candidate accordingly - tildas = [self._grow_rule_obj(X, y, r) for r in feature_candidates] - if max(tildas) > self._grow_rule_obj(X, y, candidate_rule): + # get max Z_tilde and update candidate accordingly + tildes = [self._grow_rule_obj(X, y, r) for r in feature_candidates] + if max(tildes) > self._grow_rule_obj(X, y, candidate_rule): candidate_rule = feature_candidates[ - tildas.index(max(tildas)) + tildes.index(max(tildes)) ] preds = self._rule_predict(X, candidate_rule) @@ -249,7 +249,7 @@ def _set_rule_or_default(self, X, y, learned_rule): """ Compare output of eq 5 between learned rule and default rule - return rule that minmizes eq 5 + return rule that minimizes eq 5 """ rules = [self._make_default_rule(X, y), learned_rule] @@ -356,7 +356,7 @@

    Classes

    def _condition_classify(self, X, condition): """ - Helper funciton to make classificaitons for a condition + Helper function to make classifications for a condition in a rule """ @@ -384,7 +384,7 @@

    Classes

    return preds def _get_design_matrices(self, X, y, rule): - """ produce design matrices used in most equaitons""" + """ produce design matrices used in most equations""" preds = self._rule_predict(X, rule) W_plus_idx = np.where((preds == 1) & (y == 1)) @@ -409,7 +409,7 @@

    Classes

    def _grow_rule(self, X, y): """ Starts with empty conjunction of conditions and - greedily adds rules to mazimize Z_tilda + greedily adds rules to maximize Z_tilde """ stop_condition = False @@ -433,11 +433,11 @@

    Classes

    for A_c in pivots ] - # get max Z_tilda and update candidate accordingly - tildas = [self._grow_rule_obj(X, y, r) for r in feature_candidates] - if max(tildas) > self._grow_rule_obj(X, y, candidate_rule): + # get max Z_tilde and update candidate accordingly + tildes = [self._grow_rule_obj(X, y, r) for r in feature_candidates] + if max(tildes) > self._grow_rule_obj(X, y, candidate_rule): candidate_rule = feature_candidates[ - tildas.index(max(tildas)) + tildes.index(max(tildes)) ] preds = self._rule_predict(X, candidate_rule) @@ -545,7 +545,7 @@

    Classes

    def _set_rule_or_default(self, X, y, learned_rule): """ Compare output of eq 5 between learned rule and default rule - return rule that minmizes eq 5 + return rule that minimizes eq 5 """ rules = [self._make_default_rule(X, y), learned_rule] diff --git a/docs/tree/cart_ccp.html b/docs/tree/cart_ccp.html index 73791993..77db6a62 100644 --- a/docs/tree/cart_ccp.html +++ b/docs/tree/cart_ccp.html @@ -819,7 +819,7 @@

    Methods

    https://arxiv.org/abs/2202.00858

    Params

    estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes +Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified

    reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

    @@ -896,7 +896,7 @@

    Methods

    https://arxiv.org/abs/2202.00858

    Params

    estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes +Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified

    reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

    diff --git a/docs/tree/gosdt/pygosdt.html b/docs/tree/gosdt/pygosdt.html index 73d608a5..9975695c 100644 --- a/docs/tree/gosdt/pygosdt.html +++ b/docs/tree/gosdt/pygosdt.html @@ -212,7 +212,7 @@ Returns --- - real number : the accuracy produced by applying this model overthe given dataset, with + real number : the accuracy produced by applying this model over the given dataset, with optionals for weighted accuracy """ validation.check_is_fitted(self) @@ -532,7 +532,7 @@

    Classes

    Returns --- - real number : the accuracy produced by applying this model overthe given dataset, with + real number : the accuracy produced by applying this model over the given dataset, with optionals for weighted accuracy """ validation.check_is_fitted(self) @@ -1014,7 +1014,7 @@

    Returns

    Returns

    -
    real number : the accuracy produced by applying this model overthe given dataset, with
    +
    real number : the accuracy produced by applying this model over the given dataset, with
    optionals for weighted accuracy
    @@ -1034,7 +1034,7 @@

    Returns

    Returns --- - real number : the accuracy produced by applying this model overthe given dataset, with + real number : the accuracy produced by applying this model over the given dataset, with optionals for weighted accuracy """ validation.check_is_fitted(self) diff --git a/docs/tree/gosdt/pygosdt_helper.html b/docs/tree/gosdt/pygosdt_helper.html index cca61583..a15e9964 100644 --- a/docs/tree/gosdt/pygosdt_helper.html +++ b/docs/tree/gosdt/pygosdt_helper.html @@ -180,7 +180,7 @@ Returns --- - array-like, shape = [n_sampels by 1] : a column where each element is the prediction + array-like, shape = [n_samples by 1] : a column where each element is the prediction associated with each row """ # Perform an encoding if an encoding unit is specified @@ -235,7 +235,7 @@ Returns --- - real number : the inaccuracy produced by applying this model overthe given dataset, with + real number : the inaccuracy produced by applying this model over the given dataset, with optionals for weighted inaccuracy """ return 1 - self.score(X, y, weight=weight) @@ -253,7 +253,7 @@ Returns --- - real number : the accuracy produced by applying this model overthe given dataset, with + real number : the accuracy produced by applying this model over the given dataset, with optionals for weighted accuracy """ y_hat = self.predict(X) @@ -350,7 +350,7 @@ """ Returns --- - string : pseuodocode representing the logic of this classifier + string : pseudocode representing the logic of this classifier """ cases = [] for group in self.__groups__(): @@ -803,7 +803,7 @@

    Methods

    Returns --- - array-like, shape = [n_sampels by 1] : a column where each element is the prediction + array-like, shape = [n_samples by 1] : a column where each element is the prediction associated with each row """ # Perform an encoding if an encoding unit is specified @@ -858,7 +858,7 @@

    Methods

    Returns --- - real number : the inaccuracy produced by applying this model overthe given dataset, with + real number : the inaccuracy produced by applying this model over the given dataset, with optionals for weighted inaccuracy """ return 1 - self.score(X, y, weight=weight) @@ -876,7 +876,7 @@

    Methods

    Returns --- - real number : the accuracy produced by applying this model overthe given dataset, with + real number : the accuracy produced by applying this model over the given dataset, with optionals for weighted accuracy """ y_hat = self.predict(X) @@ -973,7 +973,7 @@

    Methods

    """ Returns --- - string : pseuodocode representing the logic of this classifier + string : pseudocode representing the logic of this classifier """ cases = [] for group in self.__groups__(): @@ -1251,7 +1251,7 @@

    Returns

    Returns

    -
    real number : the inaccuracy produced by applying this model overthe given dataset, with
    +
    real number : the inaccuracy produced by applying this model over the given dataset, with
    optionals for weighted inaccuracy
    @@ -1271,7 +1271,7 @@

    Returns

    Returns --- - real number : the inaccuracy produced by applying this model overthe given dataset, with + real number : the inaccuracy produced by applying this model over the given dataset, with optionals for weighted inaccuracy """ return 1 - self.score(X, y, weight=weight)
    @@ -1510,7 +1510,7 @@

    Parameters

    Returns

    -
    array-like, shape = [n_sampels by 1] : a column where each element is the prediction
    +
    array-like, shape = [n_samples by 1] : a column where each element is the prediction
    associated with each row
    @@ -1531,7 +1531,7 @@

    Returns

    Returns --- - array-like, shape = [n_sampels by 1] : a column where each element is the prediction + array-like, shape = [n_samples by 1] : a column where each element is the prediction associated with each row """ # Perform an encoding if an encoding unit is specified @@ -1561,7 +1561,7 @@

    Returns

    Returns

    -
    real number : the accuracy produced by applying this model overthe given dataset, with
    +
    real number : the accuracy produced by applying this model over the given dataset, with
    optionals for weighted accuracy
    @@ -1581,7 +1581,7 @@

    Returns

    Returns --- - real number : the accuracy produced by applying this model overthe given dataset, with + real number : the accuracy produced by applying this model over the given dataset, with optionals for weighted accuracy """ y_hat = self.predict(X) diff --git a/docs/tree/hierarchical_shrinkage.html b/docs/tree/hierarchical_shrinkage.html index 02febe0e..0801d286 100644 --- a/docs/tree/hierarchical_shrinkage.html +++ b/docs/tree/hierarchical_shrinkage.html @@ -30,7 +30,8 @@ from sklearn.metrics import r2_score from sklearn.model_selection import cross_val_score from sklearn.model_selection import train_test_split -from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, export_text +from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier, \ + export_text from sklearn.ensemble import GradientBoostingClassifier from imodels.util import checks @@ -50,7 +51,7 @@ Params ------ estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) - Defaults to CART Classification Tree with 20 max leaf ndoes + Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified reg_param: float @@ -76,11 +77,11 @@ return {'reg_param': self.reg_param, 'estimator_': self.estimator_, 'shrinkage_scheme_': self.shrinkage_scheme_} - def fit(self, X, y, *args, **kwargs): + def fit(self, X, y, sample_weight=None, *args, **kwargs): # remove feature_names if it exists (note: only works as keyword-arg) feature_names = kwargs.pop('feature_names', None) # None returned if not passed X, y, feature_names = check_fit_arguments(self, X, y, feature_names) - self.estimator_ = self.estimator_.fit(X, y, *args, **kwargs) + self.estimator_ = self.estimator_.fit(X, y, sample_weight, *args, **kwargs) self._shrink() # compute complexity @@ -181,6 +182,27 @@ else: return s + export_text(self.estimator_, show_weights=True) + def __repr__(self): + # s = self.__class__.__name__ + # s += "(" + # s += "estimator_=" + # s += repr(self.estimator_) + # s += ", " + # s += "reg_param=" + # s += str(self.reg_param) + # s += ", " + # s += "shrinkage_scheme_=" + # s += self.shrinkage_scheme_ + # s += ")" + # return s + attr_list = ["estimator_", "reg_param", "shrinkage_scheme_"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s + class HSTreeRegressor(HSTree, RegressorMixin): ... @@ -202,7 +224,7 @@ ------ estimator_ Sklearn estimator (already initialized). - If no estimator_ is passsed, sklearn decision tree is used + If no estimator_ is passed, sklearn decision tree is used max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree @@ -232,6 +254,16 @@ self.reg_param = self.reg_param_list[np.argmax(self.scores_)] super().fit(X=X, y=y, *args, **kwargs) + def __repr__(self): + attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_", + "cv", "scoring"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s + class HSTreeRegressorCV(HSTreeRegressor): def __init__(self, estimator_: BaseEstimator = None, @@ -245,7 +277,7 @@ ------ estimator_ Sklearn estimator (already initialized). - If no estimator_ is passsed, sklearn decision tree is used + If no estimator_ is passed, sklearn decision tree is used max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree @@ -275,6 +307,16 @@ self.reg_param = self.reg_param_list[np.argmax(self.scores_)] super().fit(X=X, y=y, *args, **kwargs) + def __repr__(self): + attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_", + "cv", "scoring"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s + if __name__ == '__main__': np.random.seed(15) @@ -338,7 +380,7 @@

    Classes

    https://arxiv.org/abs/2202.00858

    Params

    estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes +Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified

    reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

    @@ -363,7 +405,7 @@

    Params

    Params ------ estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) - Defaults to CART Classification Tree with 20 max leaf ndoes + Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified reg_param: float @@ -389,11 +431,11 @@

    Params

    return {'reg_param': self.reg_param, 'estimator_': self.estimator_, 'shrinkage_scheme_': self.shrinkage_scheme_} - def fit(self, X, y, *args, **kwargs): + def fit(self, X, y, sample_weight=None, *args, **kwargs): # remove feature_names if it exists (note: only works as keyword-arg) feature_names = kwargs.pop('feature_names', None) # None returned if not passed X, y, feature_names = check_fit_arguments(self, X, y, feature_names) - self.estimator_ = self.estimator_.fit(X, y, *args, **kwargs) + self.estimator_ = self.estimator_.fit(X, y, sample_weight, *args, **kwargs) self._shrink() # compute complexity @@ -492,7 +534,28 @@

    Params

    if hasattr(self, 'feature_names') and self.feature_names is not None: return s + export_text(self.estimator_, feature_names=self.feature_names, show_weights=True) else: - return s + export_text(self.estimator_, show_weights=True)
    + return s + export_text(self.estimator_, show_weights=True) + + def __repr__(self): + # s = self.__class__.__name__ + # s += "(" + # s += "estimator_=" + # s += repr(self.estimator_) + # s += ", " + # s += "reg_param=" + # s += str(self.reg_param) + # s += ", " + # s += "shrinkage_scheme_=" + # s += self.shrinkage_scheme_ + # s += ")" + # return s + attr_list = ["estimator_", "reg_param", "shrinkage_scheme_"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s

    Subclasses

      @@ -502,7 +565,7 @@

      Subclasses

      Methods

      -def fit(self, X, y, *args, **kwargs) +def fit(self, X, y, sample_weight=None, *args, **kwargs)
      @@ -510,11 +573,11 @@

      Methods

      Expand source code -
      def fit(self, X, y, *args, **kwargs):
      +
      def fit(self, X, y, sample_weight=None, *args, **kwargs):
           # remove feature_names if it exists (note: only works as keyword-arg)
           feature_names = kwargs.pop('feature_names', None)  # None returned if not passed
           X, y, feature_names = check_fit_arguments(self, X, y, feature_names)
      -    self.estimator_ = self.estimator_.fit(X, y, *args, **kwargs)
      +    self.estimator_ = self.estimator_.fit(X, y, sample_weight, *args, **kwargs)
           self._shrink()
       
           # compute complexity
      @@ -608,7 +671,7 @@ 

      Methods

      https://arxiv.org/abs/2202.00858

      Params

      estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes +Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified

      reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

      @@ -645,7 +708,7 @@

      Subclasses

      Params

      estimator_ Sklearn estimator (already initialized). -If no estimator_ is passsed, sklearn decision tree is used

      +If no estimator_ is passed, sklearn decision tree is used

      max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree

      args, kwargs @@ -666,7 +729,7 @@

      Params

      ------ estimator_ Sklearn estimator (already initialized). - If no estimator_ is passsed, sklearn decision tree is used + If no estimator_ is passed, sklearn decision tree is used max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree @@ -694,7 +757,17 @@

      Params

      cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) self.scores_.append(np.mean(cv_scores)) self.reg_param = self.reg_param_list[np.argmax(self.scores_)] - super().fit(X=X, y=y, *args, **kwargs)
      + super().fit(X=X, y=y, *args, **kwargs) + + def __repr__(self): + attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_", + "cv", "scoring"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s

    Ancestors

      @@ -738,7 +811,7 @@

      Methods

      https://arxiv.org/abs/2202.00858

      Params

      estimator_: sklearn tree or tree ensemble model (e.g. RandomForest or GradientBoosting) -Defaults to CART Classification Tree with 20 max leaf ndoes +Defaults to CART Classification Tree with 20 max leaf nodes Note: this estimator will be directly modified

      reg_param: float Higher is more regularization (can be arbitrarily large, should not be < 0)

      @@ -775,7 +848,7 @@

      Subclasses

      Params

      estimator_ Sklearn estimator (already initialized). -If no estimator_ is passsed, sklearn decision tree is used

      +If no estimator_ is passed, sklearn decision tree is used

      max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree

      args, kwargs @@ -796,7 +869,7 @@

      Params

      ------ estimator_ Sklearn estimator (already initialized). - If no estimator_ is passsed, sklearn decision tree is used + If no estimator_ is passed, sklearn decision tree is used max_rules If estimator is None, then max_leaf_nodes is passed to the default decision tree @@ -824,7 +897,17 @@

      Params

      cv_scores = cross_val_score(est, X, y, cv=self.cv, scoring=self.scoring) self.scores_.append(np.mean(cv_scores)) self.reg_param = self.reg_param_list[np.argmax(self.scores_)] - super().fit(X=X, y=y, *args, **kwargs)
      + super().fit(X=X, y=y, *args, **kwargs) + + def __repr__(self): + attr_list = ["estimator_", "reg_param_list", "shrinkage_scheme_", + "cv", "scoring"] + s = self.__class__.__name__ + s += "(" + for attr in attr_list: + s += attr + "=" + repr(getattr(self, attr)) + ", " + s = s[:-2] + ")" + return s

    Ancestors

      diff --git a/docs/util/neural_nets.html b/docs/util/neural_nets.html index 0b0546fe..3a835b07 100644 --- a/docs/util/neural_nets.html +++ b/docs/util/neural_nets.html @@ -39,7 +39,7 @@

      Example

      dt.fit(X, y) -# pepare net +# prepare net net = Net(dt) @@ -79,7 +79,7 @@

      Example

      dt.fit(X, y) - # pepare net + # prepare net net = Net(dt)