diff --git a/docs/glossary.rst b/docs/glossary.rst index a9b4dc31b..14f349483 100644 --- a/docs/glossary.rst +++ b/docs/glossary.rst @@ -52,7 +52,7 @@ Glossary model called :term:`nanny model`. Concept Drift - A change in the underlying pattern (or mapping) between the :term:`Model Inputs` and the :term:`Target` (P(y|X)). + A change in the underlying pattern (or mapping) between the :term:`Model Inputs` and the :term:`Target` (P(Y|X)). Confidence Band When we estimate a statistic from a sample, our estimation has to take into account the variance of that statistic @@ -71,7 +71,8 @@ Glossary the actual *probability*. Regardless of the algorithm type, all classification models calculate some form of confidence scores. These scores are then thresholded to return the predicted class. Confidence scores can be turned into calibrated probabilities and used to estimate the performance of classification models in the absence - of ground truth, to learn more about this check out our :ref:`Confidence-based Performance Estimation Deep Dive`). + of ground truth, to learn more about this check out our + :ref:`Confidence-based Performance Estimation Deep Dive`. Confusion Matrix A confusion matrix is a table that is often used to describe the performance of a classification model (or @@ -81,10 +82,13 @@ Glossary For more information on the confusion matrix, see the `Wikipedia Confusion Matrix page`_. Covariate Shift - A change in joint distribution of :term:`Model Inputs`, :math:`P(\mathbf{X})`. + A change in the distribution of :term:`Model Inputs`, :math:`P(\mathbf{X})`. Note that under covariate shift + while the distribution of model inputs changes the conditional probability :math:`P(Y|\mathbf{X})` does not change. + The latter is called :term:`Concept Drift`. Data Drift - A synonym for :term:`Covariate Shift`. + A change in joint distribution of :term:`Model Inputs` and model :term:`targets`, denoted as + :math:`P(\mathbf{X}, Y)`. Data Chunk A data chunk is simply a sample of data. All the results generated by NannyML are calculated and presented on the diff --git a/nannyml/data_quality/missing/calculator.py b/nannyml/data_quality/missing/calculator.py index d9bf3513a..ad1e1b5a2 100644 --- a/nannyml/data_quality/missing/calculator.py +++ b/nannyml/data_quality/missing/calculator.py @@ -62,6 +62,9 @@ def __init__( Only one of `chunk_size`, `chunk_number` or `chunk_period` should be given. chunker : Chunker The `Chunker` used to split the data sets into a lists of chunks. + threshold: Threshold, default=StandardDeviationThreshold + The threshold you wish to evaluate values on. Defaults to a StandardDeviationThreshold with default + options. The other available value is ConstantThreshold. Examples @@ -102,14 +105,13 @@ def __init__( self._lower_alert_thresholds: Dict[str, Optional[float]] = {column_name: 0 for column_name in self.column_names} self.lower_threshold_value_limit: float = 0 - self.upper_threshold_value_limit: float + self.upper_threshold_value_limit: Optional[float] = None self.normalize = normalize if self.normalize: self.data_quality_metric = 'missing_values_rate' self.upper_threshold_value_limit = 1 else: self.data_quality_metric = 'missing_values_count' - self.upper_threshold_value_limit = np.nan def _calculate_missing_value_stats(self, data: pd.Series): count_tot = data.shape[0] @@ -217,8 +219,14 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st else: result['sampling_error'] = serr * np.sqrt(tot) - result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error'] - result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'] + result['upper_confidence_boundary'] = np.minimum( + result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error'], + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit + ) + result['lower_confidence_boundary'] = np.maximum( + result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'], + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit + ) result['upper_threshold'] = self._upper_alert_thresholds[column_name] result['lower_threshold'] = self._lower_alert_thresholds[column_name] diff --git a/nannyml/data_quality/missing/result.py b/nannyml/data_quality/missing/result.py index 4bba51c6f..0f723e7cf 100644 --- a/nannyml/data_quality/missing/result.py +++ b/nannyml/data_quality/missing/result.py @@ -24,7 +24,10 @@ class Result(PerColumnResult, ResultCompareMixin): - """Contains the results of the univariate statistical drift calculation and provides plotting functionality.""" + """Missing Values Result Class. + + Contains calculation results and provides plotting functionality. + """ def __init__( self, @@ -34,13 +37,14 @@ def __init__( timestamp_column_name: Optional[str], chunker: Chunker, ): + """Initialize Missing Values Result Class.""" super().__init__(results_data, column_names) self.timestamp_column_name = timestamp_column_name self.data_quality_metric = data_quality_metric self.chunker = chunker - def keys(self) -> List[Key]: + def keys(self) -> List[Key]: # noqa: D102 return [ Key( properties=(column_name,), @@ -55,10 +59,7 @@ def plot( *args, **kwargs, ) -> go.Figure: - """ - - Parameters - ---------- + """Plot Missing Values results. Returns ------- @@ -82,7 +83,6 @@ def plot( ... res = res.filter(period='analysis', column_name=column_name).plot().show() """ - return plot_metrics( self, title='Data Quality ', diff --git a/nannyml/performance_calculation/calculator.py b/nannyml/performance_calculation/calculator.py index 294e6d8bb..26b9c483b 100644 --- a/nannyml/performance_calculation/calculator.py +++ b/nannyml/performance_calculation/calculator.py @@ -17,7 +17,6 @@ Examples -------- - >>> import nannyml as nml >>> from IPython.display import display >>> reference_df, analysis_df, analysis_targets_df = nml.load_synthetic_car_loan_dataset() @@ -113,6 +112,7 @@ def __init__( - 'regression' - 'classification_binary' - 'classification_multiclass' + y_pred_proba: ModelOutputsType, default=None Name(s) of the column(s) containing your model output. Pass a single string when there is only a single model output column, e.g. in binary classification cases. @@ -254,7 +254,7 @@ def __init__( self.result: Optional[Result] = None - def __str__(self): + def __str__(self): # noqa: D105 return f"PerformanceCalculator[metrics={str(self.metrics)}]" @log_usage(UsageEvent.PERFORMANCE_CALC_FIT, metadata_from_self=['metrics', 'problem_type']) diff --git a/nannyml/performance_calculation/metrics/binary_classification.py b/nannyml/performance_calculation/metrics/binary_classification.py index d302251d0..a3e6d1eb7 100644 --- a/nannyml/performance_calculation/metrics/binary_classification.py +++ b/nannyml/performance_calculation/metrics/binary_classification.py @@ -600,13 +600,11 @@ def __init__( ('False Positive', 'false_positive'), ('False Negative', 'false_negative'), ], + lower_threshold_limit=0 ) - self.lower_threshold_limit: Optional[float] = 0.0 if normalize_confusion_matrix else None - self.upper_threshold_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None - + self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix - # sampling error self._sampling_error_components: Tuple = () @@ -683,8 +681,8 @@ def _calculate_confusion_matrix_alert_thresholds( lower_threshold_value, upper_threshold_value = calculate_threshold_values( threshold=self.threshold, data=np.asarray(chunked_reference_metric), - lower_threshold_value_limit=self.lower_threshold_limit, - upper_threshold_value_limit=self.upper_threshold_limit, + lower_threshold_value_limit=self.lower_threshold_value_limit, + upper_threshold_value_limit=self.upper_threshold_value_limit, logger=self._logger, metric_name=self.display_name, ) diff --git a/nannyml/performance_calculation/metrics/multiclass_classification.py b/nannyml/performance_calculation/metrics/multiclass_classification.py index 0e0ec1d65..6179477ca 100644 --- a/nannyml/performance_calculation/metrics/multiclass_classification.py +++ b/nannyml/performance_calculation/metrics/multiclass_classification.py @@ -596,9 +596,11 @@ def __init__( threshold=threshold, y_pred_proba=y_pred_proba, components=[("None", "none")], + lower_threshold_limit=0 ) self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix + self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None self.classes: Optional[List[str]] = None diff --git a/nannyml/performance_calculation/result.py b/nannyml/performance_calculation/result.py index 62aa17278..9475dfe01 100644 --- a/nannyml/performance_calculation/result.py +++ b/nannyml/performance_calculation/result.py @@ -84,9 +84,7 @@ def __init__( self.analysis_data = analysis_data def keys(self) -> List[Key]: - """ - Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')` - """ + """Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')`.""" return [ Key( properties=(component[1],), @@ -108,6 +106,7 @@ def plot( **kwargs, ) -> go.Figure: """Render realized performance metrics. + This function will return a :class:`plotly.graph_objects.Figure` object. Parameters diff --git a/nannyml/performance_estimation/confidence_based/cbpe.py b/nannyml/performance_estimation/confidence_based/cbpe.py index 2bfa7287a..add5e5776 100644 --- a/nannyml/performance_estimation/confidence_based/cbpe.py +++ b/nannyml/performance_estimation/confidence_based/cbpe.py @@ -298,14 +298,6 @@ def __init__( self.result: Optional[Result] = None - def __deepcopy__(self, memodict={}): - cls = self.__class__ - result = cls.__new__(cls, y_pred_proba=self.y_pred_proba, problem_type=self.problem_type) - memodict[id(self)] = result - for k, v in self.__dict__.items(): - setattr(result, k, copy.deepcopy(v, memodict)) - return result - @log_usage(UsageEvent.CBPE_ESTIMATOR_FIT, metadata_from_self=['metrics', 'problem_type']) def _fit(self, reference_data: pd.DataFrame, *args, **kwargs) -> CBPE: """Fits the drift calculator using a set of reference data. diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py index e0a56b49c..d81d73712 100644 --- a/nannyml/performance_estimation/confidence_based/metrics.py +++ b/nannyml/performance_estimation/confidence_based/metrics.py @@ -94,7 +94,6 @@ def __init__( Notes ----- - The `components` approach taken here is a quick fix to deal with metrics that return multiple values. Look at the `confusion_matrix` for example: a single metric produces 4 different result sets (containing values, thresholds, alerts, etc.). @@ -115,9 +114,6 @@ def __init__( self.uncalibrated_y_pred_proba = f'uncalibrated_{self.y_pred_proba}' - self.confidence_upper_bound: Optional[float] = 1.0 - self.confidence_lower_bound: Optional[float] = 0.0 - # A list of (display_name, column_name) tuples self.components: List[Tuple[str, str]] = components @@ -126,25 +122,25 @@ def _logger(self) -> logging.Logger: return logging.getLogger(__name__) @property - def display_name(self) -> str: + def display_name(self) -> str: # noqa: D102 return self.name @property - def column_name(self) -> str: + def column_name(self) -> str: # noqa: D102 return self.components[0][1] @property - def display_names(self): + def display_names(self): # noqa: D102 return [c[0] for c in self.components] @property - def column_names(self): + def column_names(self): # noqa: D102 return [c[1] for c in self.components] - def __str__(self): + def __str__(self): # noqa: D105 return self.display_name - def __repr__(self): + def __repr__(self): # noqa: D105 return self.column_name def fit(self, reference_data: pd.DataFrame): @@ -214,6 +210,19 @@ def alert(self, value: float) -> bool: ) def __eq__(self, other): + """Compares two Metric instances. + + They are considered equal when their components are equal. + + Parameters + ---------- + other: Metric + The other Metric instance you're comparing to. + + Returns + ------- + is_equal: bool + """ return self.components == other.components def _common_cleaning( @@ -242,6 +251,7 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: ---------- chunk_data : pd.DataFrame A pandas dataframe containing the data for a given chunk. + Raises ------ NotImplementedError: occurs when a metric has multiple componets @@ -271,12 +281,12 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: chunk_record[f'realized_{column_name}'] = self._realized_performance(chunk_data) chunk_record[f'upper_confidence_boundary_{column_name}'] = np.minimum( - self.confidence_upper_bound or np.inf, + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, estimated_metric_value + SAMPLING_ERROR_RANGE * metric_estimate_sampling_error, ) chunk_record[f'lower_confidence_boundary_{column_name}'] = np.maximum( - self.confidence_lower_bound or -np.inf, + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, estimated_metric_value - SAMPLING_ERROR_RANGE * metric_estimate_sampling_error, ) @@ -299,6 +309,7 @@ def _logger(cls) -> logging.Logger: @classmethod def create(cls, key: str, use_case: ProblemType, **kwargs) -> Metric: + """Create new Metric.""" if kwargs is None: kwargs = {} @@ -324,6 +335,7 @@ def create(cls, key: str, use_case: ProblemType, **kwargs) -> Metric: @classmethod def register(cls, metric: str, use_case: ProblemType) -> Callable: + """Register a Metric in the MetricFactory registry.""" def inner_wrapper(wrapped_class: Type[Metric]) -> Type[Metric]: if metric in cls.registry: if use_case in cls.registry[metric]: @@ -338,6 +350,7 @@ def inner_wrapper(wrapped_class: Type[Metric]) -> Type[Metric]: @MetricFactory.register('roc_auc', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationAUROC(Metric): + """CBPE binary classification AUROC Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -348,6 +361,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification AUROC Metric Class.""" super().__init__( name='roc_auc', y_pred_proba=y_pred_proba, @@ -431,6 +445,7 @@ def estimate_roc_auc(y_pred_proba: pd.Series) -> float: @MetricFactory.register('f1', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationF1(Metric): + """CBPE binary classification f1 Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -441,6 +456,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification f1 Metric Class.""" super().__init__( name='f1', y_pred_proba=y_pred_proba, @@ -515,6 +531,7 @@ def estimate_f1(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float: @MetricFactory.register('precision', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationPrecision(Metric): + """CBPE binary classification precision Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -525,6 +542,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification precision Metric Class.""" super().__init__( name='precision', y_pred_proba=y_pred_proba, @@ -599,6 +617,7 @@ def estimate_precision(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> floa @MetricFactory.register('recall', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationRecall(Metric): + """CBPE binary classification recall Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -609,6 +628,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification recall Metric Class.""" super().__init__( name='recall', y_pred_proba=y_pred_proba, @@ -682,6 +702,7 @@ def estimate_recall(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float: @MetricFactory.register('specificity', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationSpecificity(Metric): + """CBPE binary classification specificity Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -692,6 +713,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification specificity Metric Class.""" super().__init__( name='specificity', y_pred_proba=y_pred_proba, @@ -766,6 +788,7 @@ def estimate_specificity(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> fl @MetricFactory.register('accuracy', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationAccuracy(Metric): + """CBPE binary classification accuracy Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -776,6 +799,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification accuracy Metric Class.""" super().__init__( name='accuracy', y_pred_proba=y_pred_proba, @@ -831,6 +855,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('confusion_matrix', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationConfusionMatrix(Metric): + """CBPE binary classification confusion matrix Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -842,6 +867,7 @@ def __init__( normalize_confusion_matrix: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification confusion matrix Metric Class.""" super().__init__( name='confusion_matrix', y_pred_proba=y_pred_proba, @@ -860,14 +886,20 @@ def __init__( ) self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix - - self.true_positive_lower_threshold: Optional[float] = 0 - self.true_positive_upper_threshold: Optional[float] = 1 - self.true_negative_lower_threshold: Optional[float] = 0 - self.true_negative_upper_threshold: Optional[float] = 1 + if self.normalize_confusion_matrix is not None: + self.upper_threshold_value_limit = 1 + self.true_positive_lower_threshold: Optional[float] = None + self.true_positive_upper_threshold: Optional[float] = None + self.true_negative_lower_threshold: Optional[float] = None + self.true_negative_upper_threshold: Optional[float] = None + self.false_positive_lower_threshold: Optional[float] = None + self.false_positive_upper_threshold: Optional[float] = None + self.false_negative_lower_threshold: Optional[float] = None + self.false_negative_upper_threshold: Optional[float] = None def fit(self, reference_data: pd.DataFrame): # override the superclass fit method """Fits a Metric on reference data. + Parameters ---------- reference_data: pd.DataFrame @@ -1281,18 +1313,14 @@ def get_true_pos_info(self, chunk_data: pd.DataFrame) -> Dict: true_pos_info['sampling_error_true_positive'] = sampling_error_true_positives true_pos_info['realized_true_positive'] = self._true_positive_realized_performance(chunk_data) - if self.normalize_confusion_matrix is None: - true_pos_info['upper_confidence_boundary_true_positive'] = ( - estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives - ) - else: - true_pos_info['upper_confidence_boundary_true_positive'] = np.minimum( - self.confidence_upper_bound, - estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives, - ) + true_pos_info['upper_confidence_boundary_true_positive'] = np.minimum( + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, + estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives, + ) true_pos_info['lower_confidence_boundary_true_positive'] = np.maximum( - self.confidence_lower_bound, estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, + estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives ) true_pos_info['upper_threshold_true_positive'] = self.true_positive_upper_threshold @@ -1333,18 +1361,14 @@ def get_true_neg_info(self, chunk_data: pd.DataFrame) -> Dict: true_neg_info['sampling_error_true_negative'] = sampling_error_true_negatives true_neg_info['realized_true_negative'] = self._true_negative_realized_performance(chunk_data) - if self.normalize_confusion_matrix is None: - true_neg_info['upper_confidence_boundary_true_negative'] = ( - estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives - ) - else: - true_neg_info['upper_confidence_boundary_true_negative'] = np.minimum( - self.confidence_upper_bound, - estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives, - ) + true_neg_info['upper_confidence_boundary_true_negative'] = np.minimum( + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, + estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives, + ) true_neg_info['lower_confidence_boundary_true_negative'] = np.maximum( - self.confidence_lower_bound, estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, + estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives ) true_neg_info['upper_threshold_true_negative'] = self.true_negative_upper_threshold @@ -1385,18 +1409,13 @@ def get_false_pos_info(self, chunk_data: pd.DataFrame) -> Dict: false_pos_info['sampling_error_false_positive'] = sampling_error_false_positives false_pos_info['realized_false_positive'] = self._false_positive_realized_performance(chunk_data) - if self.normalize_confusion_matrix is None: - false_pos_info['upper_confidence_boundary_false_positive'] = ( - estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives - ) - else: - false_pos_info['upper_confidence_boundary_false_positive'] = np.minimum( - self.confidence_upper_bound, - estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives, - ) + false_pos_info['upper_confidence_boundary_false_positive'] = np.minimum( + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, + estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives, + ) false_pos_info['lower_confidence_boundary_false_positive'] = np.maximum( - self.confidence_lower_bound, + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, estimated_false_positives - SAMPLING_ERROR_RANGE * sampling_error_false_positives, ) @@ -1438,18 +1457,13 @@ def get_false_neg_info(self, chunk_data: pd.DataFrame) -> Dict: false_neg_info['sampling_error_false_negative'] = sampling_error_false_negatives false_neg_info['realized_false_negative'] = self._false_negative_realized_performance(chunk_data) - if self.normalize_confusion_matrix is None: - false_neg_info['upper_confidence_boundary_false_negative'] = ( - estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives - ) - else: - false_neg_info['upper_confidence_boundary_false_negative'] = np.minimum( - self.confidence_upper_bound, - estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives, - ) + false_neg_info['upper_confidence_boundary_false_negative'] = np.minimum( + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, + estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives, + ) false_neg_info['lower_confidence_boundary_false_negative'] = np.maximum( - self.confidence_lower_bound, + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, estimated_false_negatives - SAMPLING_ERROR_RANGE * sampling_error_false_negatives, ) @@ -1467,6 +1481,18 @@ def get_false_neg_info(self, chunk_data: pd.DataFrame) -> Dict: return false_neg_info def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: + """Returns a dictionary containing the performance metrics for a given chunk. + + Parameters + ---------- + chunk_data : pd.DataFrame + A pandas dataframe containing the data for a given chunk. + + Returns + ------- + chunk_record : Dict + A dictionary of perfomance metric, value pairs. + """ chunk_record = {} true_pos_info = self.get_true_pos_info(chunk_data) @@ -1495,6 +1521,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('business_value', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationBusinessValue(Metric): + """CBPE binary classification business value Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -1507,6 +1534,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE binary classification business value Metric Class.""" super().__init__( name='business_value', y_pred_proba=y_pred_proba, @@ -1537,11 +1565,8 @@ def __init__( self.business_value_matrix = business_value_matrix self.normalize_business_value: Optional[str] = normalize_business_value - self.lower_threshold: Optional[float] = 0 - self.upper_threshold: Optional[float] = 1 - - self.confidence_upper_bound: Optional[float] = None - self.confidence_lower_bound: Optional[float] = None + # self.lower_threshold: Optional[float] = 0 + # self.upper_threshold: Optional[float] = 1 def _fit(self, reference_data: pd.DataFrame): self._sampling_error_components = bse.business_value_sampling_error_components( @@ -1607,15 +1632,19 @@ def estimate_business_value( normalize_business_value: str, default=None Determines how the business value will be normalized. Allowed values are None and 'per_prediction'. - - None - the business value will not be normalized and the value returned will be the total value per chunk. - - 'per_prediction' - the value will be normalized by the number of predictions in the chunk. + - None - the business value will not be normalized and the value returned will be the total value per chunk. + - 'per_prediction' - the value will be normalized by the number of predictions in the chunk. + + business_value_matrix: np.ndarray + A 2x2 matrix that specifies the value of each cell in the confusion matrix. + The format of the business value matrix must be specified as [[value_of_TN, value_of_FP], \ + [value_of_FN, value_of_TP]]. Returns ------- business_value: float Estimated Business Value score. """ - est_tn_ratio = np.mean(np.where(y_pred == 0, 1 - y_pred_proba, 0)) est_tp_ratio = np.mean(np.where(y_pred == 1, y_pred_proba, 0)) est_fp_ratio = np.mean(np.where(y_pred == 1, 1 - y_pred_proba, 0)) @@ -1683,6 +1712,7 @@ def _ensure_targets(self, data: pd.DataFrame) -> Optional[pd.DataFrame]: @MetricFactory.register('roc_auc', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationAUROC(_MulticlassClassificationMetric): + """CBPE multiclass classification AUROC Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -1693,6 +1723,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification AUROC Metric Class.""" super().__init__( name='roc_auc', y_pred_proba=y_pred_proba, @@ -1747,6 +1778,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('f1', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationF1(_MulticlassClassificationMetric): + """CBPE multiclass classification f1 Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -1757,6 +1789,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification f1 Metric Class.""" super().__init__( name='f1', y_pred_proba=y_pred_proba, @@ -1814,6 +1847,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('precision', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationPrecision(_MulticlassClassificationMetric): + """CBPE multiclass classification precision Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -1824,6 +1858,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification precision Metric Class.""" super().__init__( name='precision', y_pred_proba=y_pred_proba, @@ -1881,6 +1916,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('recall', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationRecall(_MulticlassClassificationMetric): + """CBPE multiclass classification recall Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -1891,6 +1927,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification recall Metric Class.""" super().__init__( name='recall', y_pred_proba=y_pred_proba, @@ -1948,6 +1985,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('specificity', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationSpecificity(_MulticlassClassificationMetric): + """CBPE multiclass classification specificity Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -1958,6 +1996,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification specificity Metric Class.""" super().__init__( name='specificity', y_pred_proba=y_pred_proba, @@ -2019,6 +2058,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('accuracy', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationAccuracy(_MulticlassClassificationMetric): + """CBPE multiclass classification accuracy Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -2029,6 +2069,7 @@ def __init__( timestamp_column_name: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification accuracy Metric Class.""" super().__init__( name='accuracy', y_pred_proba=y_pred_proba, @@ -2083,6 +2124,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('confusion_matrix', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationConfusionMatrix(Metric): + """CBPE multiclass classification confusion matrix Metric Class.""" def __init__( self, y_pred_proba: ModelOutputsType, @@ -2094,6 +2136,7 @@ def __init__( normalize_confusion_matrix: Optional[str] = None, **kwargs, ): + """Initialize CBPE multiclass classification confusion matrix Metric Class.""" if isinstance(y_pred_proba, str): raise ValueError( "y_pred_proba must be a dictionary with class labels as keys and pred_proba column names as values" @@ -2114,6 +2157,11 @@ def __init__( ) self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix + if self.normalize_confusion_matrix is None: + # overwrite default upper bound setting. + self.upper_threshold_value_limit = None + else: + self.upper_threshold_value_limit = 1 def _get_components(self, classes: List[str]) -> List[Tuple[str, str]]: components = [] @@ -2131,6 +2179,7 @@ def _get_components(self, classes: List[str]) -> List[Tuple[str, str]]: def fit(self, reference_data: pd.DataFrame): # override the superclass fit method """Fits a Metric on reference data. + Parameters ---------- reference_data: pd.DataFrame @@ -2243,6 +2292,18 @@ def _get_multiclass_confusion_matrix_estimate(self, chunk_data: pd.DataFrame) -> return normalized_est_confusion_matrix def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: + """Returns a dictionary containing the performance metrics for a given chunk. + + Parameters + ---------- + chunk_data : pd.DataFrame + A pandas dataframe containing the data for a given chunk. + + Returns + ------- + chunk_record : Dict + A dictionary of perfomance metric, value pairs. + """ chunk_record = {} estimated_cm = self._get_multiclass_confusion_matrix_estimate(chunk_data) @@ -2276,30 +2337,20 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: + SAMPLING_ERROR_RANGE * sampling_error[self.classes.index(true_class), self.classes.index(pred_class)] ) - - if self.normalize_confusion_matrix is None: - chunk_record[ - f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}' - ] = upper_confidence_boundary - else: - chunk_record[f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'] = min( - self.confidence_upper_bound, upper_confidence_boundary - ) + chunk_record[f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'] = min( + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, + upper_confidence_boundary + ) lower_confidence_boundary = ( estimated_cm[self.classes.index(true_class), self.classes.index(pred_class)] - SAMPLING_ERROR_RANGE * sampling_error[self.classes.index(true_class), self.classes.index(pred_class)] ) - - if self.normalize_confusion_matrix is None: - chunk_record[ - f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}' - ] = lower_confidence_boundary - else: - chunk_record[f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'] = max( - self.confidence_lower_bound, lower_confidence_boundary - ) + chunk_record[f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'] = max( + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, + lower_confidence_boundary + ) chunk_record[f'upper_threshold_true_{true_class}_pred_{pred_class}'] = self.alert_thresholds[ f'true_{true_class}_pred_{pred_class}' diff --git a/nannyml/performance_estimation/confidence_based/results.py b/nannyml/performance_estimation/confidence_based/results.py index 8de0ffd74..308b897d0 100644 --- a/nannyml/performance_estimation/confidence_based/results.py +++ b/nannyml/performance_estimation/confidence_based/results.py @@ -37,7 +37,8 @@ def __init__( problem_type: ProblemType, timestamp_column_name: Optional[str] = None, ): - """ + """Initialize CBPE results class. + Parameters ---------- results_data: pd.DataFrame @@ -124,9 +125,7 @@ def _get_metric_by_name(self, name: str) -> Optional[Metric]: return None def keys(self) -> List[Key]: - """ - Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')` - """ + """Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')`.""" return [ Key( properties=(component[1],), @@ -155,6 +154,8 @@ def plot( Parameters ---------- kind: str, default='performance' + What kind of plot to create. Only performance type is available. + Raises ------ diff --git a/nannyml/stats/std/calculator.py b/nannyml/stats/std/calculator.py index 1f971fb87..4dbea7a80 100644 --- a/nannyml/stats/std/calculator.py +++ b/nannyml/stats/std/calculator.py @@ -2,7 +2,7 @@ # # License: Apache Software License 2.0 -"""Simple Statistics Average Calculator""" +"""Simple Statistics Standard Deviation Module.""" from typing import Any, Dict, List, Optional, Tuple, Union @@ -25,7 +25,7 @@ class SummaryStatsStdCalculator(AbstractCalculator): - """SummaryStatsStdCalculator implementation""" + """Simple Statistics Standard Deviation Calculator.""" def __init__( self, @@ -102,8 +102,8 @@ def __init__( self._upper_alert_thresholds: Dict[str, Optional[float]] = {column_name: 0 for column_name in self.column_names} self._lower_alert_thresholds: Dict[str, Optional[float]] = {column_name: 0 for column_name in self.column_names} - self.lower_threshold_value_limit: float = np.nan - self.upper_threshold_value_limit: float = np.nan + self.lower_threshold_value_limit: float = 0 + self.upper_threshold_value_limit: Optional[float] = None self.simple_stats_metric = 'values_std' @log_usage(UsageEvent.STATS_STD_FIT) @@ -203,7 +203,10 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st self._sampling_error_components[column_name], data[column_name] ) result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error'] - result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'] + result['lower_confidence_boundary'] = np.maximum( + result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'], + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit + ) result['upper_threshold'] = self._upper_alert_thresholds[column_name] result['lower_threshold'] = self._lower_alert_thresholds[column_name] diff --git a/nannyml/stats/std/result.py b/nannyml/stats/std/result.py index 984abce94..ccd0ffe28 100644 --- a/nannyml/stats/std/result.py +++ b/nannyml/stats/std/result.py @@ -26,7 +26,7 @@ class Result(PerColumnResult, ResultCompareMixin): - """Contains the results of the univariate statistical drift calculation and provides plotting functionality.""" + """Summary Stats Standard Deviation Calculator Results object.""" def __init__( self, @@ -36,13 +36,14 @@ def __init__( timestamp_column_name: Optional[str], chunker: Chunker, ): + """Initalize Summary Stats Standard Deviation Calculator Results object.""" super().__init__(results_data, column_names) self.timestamp_column_name = timestamp_column_name self.simple_stats_metric = simple_stats_metric self.chunker = chunker - def keys(self) -> List[Key]: + def keys(self) -> List[Key]: # noqa: D102 return [ Key( properties=(column_name,), @@ -57,10 +58,7 @@ def plot( *args, **kwargs, ) -> go.Figure: - """ - - Parameters - ---------- + """Plot Summary Stats Standard Deviation Calculator Results. Returns ------- @@ -84,7 +82,6 @@ def plot( ... res = res.filter(period='analysis', column_name=column_name).plot().show() """ - return plot_metrics( self, title='Values Standard Deviation', diff --git a/tests/performance_estimation/CBPE/test_cbpe.py b/tests/performance_estimation/CBPE/test_cbpe.py index 8a6265f9b..270dc6012 100644 --- a/tests/performance_estimation/CBPE/test_cbpe.py +++ b/tests/performance_estimation/CBPE/test_cbpe.py @@ -360,11 +360,8 @@ def reduce_confidence_bounds(monkeypatch, estimator, results): new_upper_bound = max_confidence - 0.001 for metric in estimator.metrics: - monkeypatch.setattr(metric, 'confidence_lower_bound', new_lower_bound) - monkeypatch.setattr(metric, 'confidence_upper_bound', new_upper_bound) - - # monkeypatch.setattr(estimator, 'confidence_lower_bound', new_lower_bound) - # monkeypatch.setattr(estimator, 'confidence_upper_bound', new_upper_bound) + monkeypatch.setattr(metric, 'lower_threshold_value_limit', new_lower_bound) + monkeypatch.setattr(metric, 'upper_threshold_value_limit', new_upper_bound) return estimator, new_lower_bound, new_upper_bound