Skip to content

Commit

Permalink
types on functions
Browse files Browse the repository at this point in the history
  • Loading branch information
fwick-panasonic committed Aug 20, 2023
1 parent 6bcafad commit 14500ff
Show file tree
Hide file tree
Showing 3 changed files with 60 additions and 46 deletions.
14 changes: 8 additions & 6 deletions cyclic_boosting/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -923,7 +923,7 @@ def get_subestimators_as_items(self, prototypes=True) -> List[Tuple]:
return [(feature.feature_id, feature.smoother) for feature in self.features]

@abc.abstractmethod
def calc_parameters(self, feature: Feature, y: np.ndarray, prediction_link: np.ndarray, prefit_data):
def calc_parameters(self, feature: Feature, y: np.ndarray, pred: CBLinkPredictionsFactors, prefit_data):
"""Calculates factors and uncertainties of the bins of a feature group
in the original space (not the link space) and transforms them to the
link space afterwards
Expand All @@ -944,8 +944,9 @@ def calc_parameters(self, feature: Feature, y: np.ndarray, prediction_link: np.n
class containing all features
y: np.ndarray
target, truth
prediction_link: np.ndarray
prediction in link space of all *other* features.
pred
(in-sample) predictions from all other features (excluding the one
at hand)
prefit_data
data returned by :meth:`~.precalc_parameters` during fit
Expand All @@ -958,7 +959,7 @@ class containing all features
raise NotImplementedError("implement in subclass")

@abc.abstractmethod
def precalc_parameters(self, feature: Feature, y: np.ndarray, prediction_link: np.ndarray):
def precalc_parameters(self, feature: Feature, y: np.ndarray, pred: CBLinkPredictionsFactors):
"""Calculations that are not dependent on intermediate predictions. If
these are not needed, return :obj:`None` in the subclass.
Expand All @@ -971,8 +972,9 @@ def precalc_parameters(self, feature: Feature, y: np.ndarray, prediction_link: n
class containing all features
y: np.ndarray
target, truth
prediction_link: np.ndarray
prediction in link space.
pred
(in-sample) predictions from all other features (excluding the one
at hand)
"""
return None

Expand Down
90 changes: 51 additions & 39 deletions cyclic_boosting/generic_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,18 @@
import warnings

import numpy as np
import pandas as pd
import six
import sklearn.base
from scipy.optimize import minimize
from scipy.stats import beta

from cyclic_boosting.base import CyclicBoostingBase, gaussian_matching_by_quantiles
from cyclic_boosting.base import CyclicBoostingBase, gaussian_matching_by_quantiles, Feature, CBLinkPredictionsFactors
from cyclic_boosting.link import LogLinkMixin, IdentityLinkMixin, LogitLinkMixin
from cyclic_boosting.utils import continuous_quantile_from_discrete, get_X_column
from cyclic_boosting.classification import get_beta_priors

from typing import Tuple, Union

_logger = logging.getLogger(__name__)

Expand All @@ -28,10 +32,12 @@ class CBGenericLoss(CyclicBoostingBase):
``CBNBinomRegressor``, or ``CBLocationRegressor``).
"""

def precalc_parameters(self, feature, y, pred):
def precalc_parameters(self, feature: Feature, y: np.ndarray, pred: CBLinkPredictionsFactors) -> None:
pass

def calc_parameters(self, feature, y, pred, prefit_data):
def calc_parameters(
self, feature: Feature, y: np.ndarray, pred: CBLinkPredictionsFactors, prefit_data
) -> Tuple[np.ndarray, np.ndarray]:
"""
Calling of the optimization (loss minimization) for the different bins
of the feature at hand. In contrast to the analytical solution in most
Expand Down Expand Up @@ -87,7 +93,7 @@ def calc_parameters(self, feature, y, pred, prefit_data):
parameters = np.log(parameters)
return parameters, uncertainties

def optimization(self, y, yhat_others, weights):
def optimization(self, y: np.ndarray, yhat_others: np.ndarray, weights: np.ndarray) -> Tuple[float, float]:
"""
Minimization of the costs (potentially including sample weights) for
individual feature bins. The initial value for the parameters is set to
Expand All @@ -114,7 +120,7 @@ def optimization(self, y, yhat_others, weights):
res = minimize(self.objective_function, neutral_factor, args=(yhat_others, y, weights))
return res.x, self.uncertainty(y, weights)

def objective_function(self, param, yhat_others, y, weights):
def objective_function(self, param: float, yhat_others: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
"""
Calculation of the in-sample costs (potentially including sample
weights) for individual feature bins according to a given loss
Expand All @@ -141,15 +147,15 @@ def objective_function(self, param, yhat_others, y, weights):
return self.costs(model, y, weights)

@abc.abstractmethod
def costs(self, prediction, y, weights):
def costs(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
raise NotImplementedError("implement in subclass")

@abc.abstractmethod
def model(self, param, yhat_others):
def model(self, param: float, yhat_others: np.ndarray) -> np.ndarray:
raise NotImplementedError("implement in subclass")

@abc.abstractmethod
def uncertainty(self, y, weights):
def uncertainty(self, y: np.ndarray, weights: np.ndarray) -> float:
"""
Estimation of parameter uncertainty for a given feature bin.
Expand Down Expand Up @@ -222,7 +228,7 @@ def __init__(
def _check_y(self, y: np.ndarray) -> None:
check_y_multiplicative(y)

def loss(self, prediction, y, weights):
def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
"""
Calculation of the in-sample quantile loss, or to be exact costs,
(potentially including sample weights) after full feature cycles, i.e.,
Expand All @@ -244,18 +250,18 @@ def loss(self, prediction, y, weights):
"""
return quantile_costs(prediction, y, weights, self.quantile)

def _init_global_scale(self, X, y):
def _init_global_scale(self, X: Union[pd.DataFrame, np.ndarray], y: np.ndarray) -> None:
self.global_scale_link_, self.prior_pred_link_offset_ = quantile_global_scale(
X, y, self.quantile, self.weights, self.prior_prediction_column, self.link_func
)

def costs(self, prediction, y, weights):
def costs(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return quantile_costs(prediction, y, weights, self.quantile)

def model(self, param, yhat_others):
def model(self, param: float, yhat_others: np.ndarray) -> np.ndarray:
return model_multiplicative(param, yhat_others)

def uncertainty(self, y, weights):
def uncertainty(self, y: np.ndarray, weights: np.ndarray) -> float:
return uncertainty_gamma(y, weights)


Expand Down Expand Up @@ -312,7 +318,7 @@ def __init__(
def _check_y(self, y: np.ndarray) -> None:
check_y_additive(y)

def loss(self, prediction, y, weights):
def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
"""
Calculation of the in-sample quantile loss, or to be exact costs,
(potentially including sample weights) after full feature cycles, i.e.,
Expand All @@ -334,22 +340,22 @@ def loss(self, prediction, y, weights):
"""
return quantile_costs(prediction, y, weights, self.quantile)

def _init_global_scale(self, X, y):
def _init_global_scale(self, X: Union[pd.DataFrame, np.ndarray], y: np.ndarray) -> None:
self.global_scale_link_, self.prior_pred_link_offset_ = quantile_global_scale(
X, y, self.quantile, self.weights, self.prior_prediction_column, self.link_func
)

def costs(self, prediction, y, weights):
def costs(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return quantile_costs(prediction, y, weights, self.quantile)

def model(self, param, yhat_others):
def model(self, param: float, yhat_others: np.ndarray) -> np.ndarray:
return model_additive(param, yhat_others)

def uncertainty(self, y, weights):
def uncertainty(self, y: np.ndarray, weights: np.ndarray) -> float:
return uncertainty_gaussian(y, weights)


def quantile_costs(prediction, y, weights, quantile):
def quantile_costs(prediction: np.ndarray, y: np.ndarray, weights: np.ndarray, quantile: float) -> float:
"""
Calculation of the in-sample quantile costs (potentially including sample
weights).
Expand Down Expand Up @@ -380,7 +386,14 @@ def quantile_costs(prediction, y, weights, quantile):
return sum_weighted_error / np.nansum(weights)


def quantile_global_scale(X, y, quantile, weights, prior_prediction_column, link_func):
def quantile_global_scale(
X: Union[pd.DataFrame, np.ndarray],
y: np.ndarray,
quantile: float,
weights: np.ndarray,
prior_prediction_column: Union[str, int, None],
link_func,
) -> None:
"""
Calculation of the global scale for quantile regression, corresponding
to the (continuous approximation of the) respective quantile of the
Expand Down Expand Up @@ -423,15 +436,15 @@ def quantile_global_scale(X, y, quantile, weights, prior_prediction_column, link
return global_scale_link_, prior_pred_link_offset_


def model_multiplicative(param, yhat_others):
def model_multiplicative(param: float, yhat_others: np.ndarray) -> np.ndarray:
return param * yhat_others


def model_additive(param, yhat_others):
def model_additive(param: float, yhat_others: np.ndarray) -> np.ndarray:
return param + yhat_others


def uncertainty_gamma(y, weights):
def uncertainty_gamma(y: np.ndarray, weights: np.ndarray) -> float:
# use moment-matching of a Gamma posterior with a log-normal
# distribution as approximation
alpha_prior = 2
Expand All @@ -440,15 +453,14 @@ def uncertainty_gamma(y, weights):
return sigma


def uncertainty_gaussian(y, weights):
def uncertainty_gaussian(y: np.ndarray, weights: np.ndarray) -> float:
return np.sqrt(np.mean(y) / len(y))


def uncertainty_beta(y, weights, link_func):
def uncertainty_beta(y: np.ndarray, weights: np.ndarray, link_func) -> float:
# use moment-matching of a Beta posterior with a log-normal
# distribution as approximation
alpha_prior = 1.001
beta_prior = 1.001
alpha_prior, beta_prior = get_beta_priors()
alpha_posterior = np.sum(y) + alpha_prior
beta_posterior = np.sum(1 - y) + beta_prior
shift = 0.4 * (alpha_posterior / (alpha_posterior + beta_posterior) - 0.5)
Expand Down Expand Up @@ -528,19 +540,19 @@ def __init__(

self.costs = costs

def loss(self, prediction, y, weights):
def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return self.costs(prediction, y, weights)

def _check_y(self, y: np.ndarray) -> None:
check_y_multiplicative(y)

def costs(self, prediction, y, weights):
def costs(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return self.costs(prediction, y, weights)

def model(self, param, yhat_others):
def model(self, param: float, yhat_others: np.ndarray) -> np.ndarray:
return model_multiplicative(param, yhat_others)

def uncertainty(self, y, weights):
def uncertainty(self, y: np.ndarray, weights: np.ndarray) -> float:
return uncertainty_gamma(y, weights)


Expand Down Expand Up @@ -591,19 +603,19 @@ def __init__(

self.costs = costs

def loss(self, prediction, y, weights):
def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return self.costs(prediction, y, weights)

def _check_y(self, y: np.ndarray) -> None:
check_y_additive(y)

def costs(self, prediction, y, weights):
def costs(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return self.costs(prediction, y, weights)

def model(self, param, yhat_others):
def model(self, param: float, yhat_others: np.ndarray) -> np.ndarray:
return model_additive(param, yhat_others)

def uncertainty(self, y, weights):
def uncertainty(self, y: np.ndarray, weights: np.ndarray) -> float:
return uncertainty_gaussian(y, weights)


Expand Down Expand Up @@ -653,19 +665,19 @@ def __init__(

self.costs = costs

def loss(self, prediction, y, weights):
def loss(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return self.costs(prediction, y, weights)

def _check_y(self, y: np.ndarray) -> None:
check_y_classification(y)

def costs(self, prediction, y, weights):
def costs(self, prediction: np.ndarray, y: np.ndarray, weights: np.ndarray) -> float:
return self.costs(prediction, y, weights)

def model(self, param, yhat_others):
def model(self, param: float, yhat_others: np.ndarray) -> np.ndarray:
return model_multiplicative(param, yhat_others)

def uncertainty(self, y, weights):
def uncertainty(self, y: np.ndarray, weights: np.ndarray) -> float:
return uncertainty_beta(y, weights, self.link_func)


Expand Down
2 changes: 1 addition & 1 deletion cyclic_boosting/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
from cyclic_boosting.features import Feature
from cyclic_boosting.link import LogLinkMixin

from typing import Tuple, Union
from typing import Tuple

_logger = logging.getLogger(__name__)

Expand Down

0 comments on commit 14500ff

Please sign in to comment.