From 7286f4154371df2bcd8bf035efee4e60946afa38 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Wed, 7 Feb 2024 16:25:12 +0200
Subject: [PATCH 01/10] glossary cov shift update

---
 docs/glossary.rst | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/docs/glossary.rst b/docs/glossary.rst
index a9b4dc31b..14f349483 100644
--- a/docs/glossary.rst
+++ b/docs/glossary.rst
@@ -52,7 +52,7 @@ Glossary
         model called :term:`nanny model`.
 
     Concept Drift
-        A change in the underlying pattern (or mapping) between the :term:`Model Inputs` and the :term:`Target` (P(y|X)).
+        A change in the underlying pattern (or mapping) between the :term:`Model Inputs` and the :term:`Target` (P(Y|X)).
 
     Confidence Band
         When we estimate a statistic from a sample, our estimation has to take into account the variance of that statistic
@@ -71,7 +71,8 @@ Glossary
         the actual *probability*. Regardless of the algorithm type, all classification models calculate some form of
         confidence scores. These scores are then thresholded to return the predicted class. Confidence scores can be
         turned into calibrated probabilities and used to estimate the performance of classification models in the absence
-        of ground truth, to learn more about this check out our :ref:`Confidence-based Performance Estimation Deep Dive<how-it-works-cbpe>`).
+        of ground truth, to learn more about this check out our
+        :ref:`Confidence-based Performance Estimation Deep Dive<how-it-works-cbpe>`.
 
     Confusion Matrix
         A confusion matrix is a table that is often used to describe the performance of a classification model (or
@@ -81,10 +82,13 @@ Glossary
         For more information on the confusion matrix, see the `Wikipedia Confusion Matrix page`_.
 
     Covariate Shift
-        A change in joint distribution of :term:`Model Inputs`, :math:`P(\mathbf{X})`.
+        A change in the distribution of :term:`Model Inputs`, :math:`P(\mathbf{X})`. Note that under covariate shift
+        while the distribution of model inputs changes the conditional probability :math:`P(Y|\mathbf{X})` does not change.
+        The latter is called :term:`Concept Drift`.
 
     Data Drift
-        A synonym for :term:`Covariate Shift`.
+        A change in joint distribution of :term:`Model Inputs` and model :term:`targets<Target>`, denoted as
+        :math:`P(\mathbf{X}, Y)`.
 
     Data Chunk
         A data chunk is simply a sample of data. All the results generated by NannyML are calculated and presented on the

From 0ce138319f84831a37a15006fe0a5e9adc938628 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Wed, 7 Feb 2024 22:03:33 +0200
Subject: [PATCH 02/10] remove duplicate confidence upper bound functionality

---
 .../confidence_based/metrics.py               | 54 ++++++++++---------
 .../performance_estimation/CBPE/test_cbpe.py  |  7 +--
 2 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py
index e0a56b49c..b92e0d3a4 100644
--- a/nannyml/performance_estimation/confidence_based/metrics.py
+++ b/nannyml/performance_estimation/confidence_based/metrics.py
@@ -115,9 +115,6 @@ def __init__(
 
         self.uncalibrated_y_pred_proba = f'uncalibrated_{self.y_pred_proba}'
 
-        self.confidence_upper_bound: Optional[float] = 1.0
-        self.confidence_lower_bound: Optional[float] = 0.0
-
         # A list of (display_name, column_name) tuples
         self.components: List[Tuple[str, str]] = components
 
@@ -271,12 +268,12 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
         chunk_record[f'realized_{column_name}'] = self._realized_performance(chunk_data)
 
         chunk_record[f'upper_confidence_boundary_{column_name}'] = np.minimum(
-            self.confidence_upper_bound or np.inf,
+            self.upper_threshold_value_limit or np.inf,
             estimated_metric_value + SAMPLING_ERROR_RANGE * metric_estimate_sampling_error,
         )
 
         chunk_record[f'lower_confidence_boundary_{column_name}'] = np.maximum(
-            self.confidence_lower_bound or -np.inf,
+            self.lower_threshold_value_limit or -np.inf,
             estimated_metric_value - SAMPLING_ERROR_RANGE * metric_estimate_sampling_error,
         )
 
@@ -860,11 +857,16 @@ def __init__(
         )
 
         self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix
-
-        self.true_positive_lower_threshold: Optional[float] = 0
-        self.true_positive_upper_threshold: Optional[float] = 1
-        self.true_negative_lower_threshold: Optional[float] = 0
-        self.true_negative_upper_threshold: Optional[float] = 1
+        if self.normalize_confusion_matrix is not None:
+            self.upper_threshold_value_limit = 1
+        self.true_positive_lower_threshold: Optional[float] = None
+        self.true_positive_upper_threshold: Optional[float] = None
+        self.true_negative_lower_threshold: Optional[float] = None
+        self.true_negative_upper_threshold: Optional[float] = None
+        self.false_positive_lower_threshold: Optional[float] = None
+        self.false_positive_upper_threshold: Optional[float] = None
+        self.false_negative_lower_threshold: Optional[float] = None
+        self.false_negative_upper_threshold: Optional[float] = None
 
     def fit(self, reference_data: pd.DataFrame):  # override the superclass fit method
         """Fits a Metric on reference data.
@@ -1287,12 +1289,12 @@ def get_true_pos_info(self, chunk_data: pd.DataFrame) -> Dict:
             )
         else:
             true_pos_info['upper_confidence_boundary_true_positive'] = np.minimum(
-                self.confidence_upper_bound,
+                self.upper_threshold_value_limit,
                 estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives,
             )
 
         true_pos_info['lower_confidence_boundary_true_positive'] = np.maximum(
-            self.confidence_lower_bound, estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives
+            self.lower_threshold_value_limit, estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives
         )
 
         true_pos_info['upper_threshold_true_positive'] = self.true_positive_upper_threshold
@@ -1339,12 +1341,12 @@ def get_true_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
             )
         else:
             true_neg_info['upper_confidence_boundary_true_negative'] = np.minimum(
-                self.confidence_upper_bound,
+                self.upper_threshold_value_limit,
                 estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives,
             )
 
         true_neg_info['lower_confidence_boundary_true_negative'] = np.maximum(
-            self.confidence_lower_bound, estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives
+            self.lower_threshold_value_limit, estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives
         )
 
         true_neg_info['upper_threshold_true_negative'] = self.true_negative_upper_threshold
@@ -1391,12 +1393,12 @@ def get_false_pos_info(self, chunk_data: pd.DataFrame) -> Dict:
             )
         else:
             false_pos_info['upper_confidence_boundary_false_positive'] = np.minimum(
-                self.confidence_upper_bound,
+                self.upper_threshold_value_limit,
                 estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives,
             )
 
         false_pos_info['lower_confidence_boundary_false_positive'] = np.maximum(
-            self.confidence_lower_bound,
+            self.lower_threshold_value_limit,
             estimated_false_positives - SAMPLING_ERROR_RANGE * sampling_error_false_positives,
         )
 
@@ -1444,12 +1446,12 @@ def get_false_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
             )
         else:
             false_neg_info['upper_confidence_boundary_false_negative'] = np.minimum(
-                self.confidence_upper_bound,
+                self.upper_threshold_value_limit,
                 estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
             )
 
         false_neg_info['lower_confidence_boundary_false_negative'] = np.maximum(
-            self.confidence_lower_bound,
+            self.lower_threshold_value_limit,
             estimated_false_negatives - SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
         )
 
@@ -1537,11 +1539,8 @@ def __init__(
         self.business_value_matrix = business_value_matrix
         self.normalize_business_value: Optional[str] = normalize_business_value
 
-        self.lower_threshold: Optional[float] = 0
-        self.upper_threshold: Optional[float] = 1
-
-        self.confidence_upper_bound: Optional[float] = None
-        self.confidence_lower_bound: Optional[float] = None
+        # self.lower_threshold: Optional[float] = 0
+        # self.upper_threshold: Optional[float] = 1
 
     def _fit(self, reference_data: pd.DataFrame):
         self._sampling_error_components = bse.business_value_sampling_error_components(
@@ -2114,6 +2113,11 @@ def __init__(
         )
 
         self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix
+        if self.normalize_confusion_matrix is None:
+            # overwrite default upper bound setting.
+            self.upper_threshold_value_limit = None
+        else:
+            self.upper_threshold_value_limit = 1
 
     def _get_components(self, classes: List[str]) -> List[Tuple[str, str]]:
         components = []
@@ -2283,7 +2287,7 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
                     ] = upper_confidence_boundary
                 else:
                     chunk_record[f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'] = min(
-                        self.confidence_upper_bound, upper_confidence_boundary
+                        self.upper_threshold_value_limit, upper_confidence_boundary
                     )
 
                 lower_confidence_boundary = (
@@ -2298,7 +2302,7 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
                     ] = lower_confidence_boundary
                 else:
                     chunk_record[f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'] = max(
-                        self.confidence_lower_bound, lower_confidence_boundary
+                        self.lower_threshold_value_limit, lower_confidence_boundary
                     )
 
                 chunk_record[f'upper_threshold_true_{true_class}_pred_{pred_class}'] = self.alert_thresholds[
diff --git a/tests/performance_estimation/CBPE/test_cbpe.py b/tests/performance_estimation/CBPE/test_cbpe.py
index 8a6265f9b..270dc6012 100644
--- a/tests/performance_estimation/CBPE/test_cbpe.py
+++ b/tests/performance_estimation/CBPE/test_cbpe.py
@@ -360,11 +360,8 @@ def reduce_confidence_bounds(monkeypatch, estimator, results):
     new_upper_bound = max_confidence - 0.001
 
     for metric in estimator.metrics:
-        monkeypatch.setattr(metric, 'confidence_lower_bound', new_lower_bound)
-        monkeypatch.setattr(metric, 'confidence_upper_bound', new_upper_bound)
-
-    # monkeypatch.setattr(estimator, 'confidence_lower_bound', new_lower_bound)
-    # monkeypatch.setattr(estimator, 'confidence_upper_bound', new_upper_bound)
+        monkeypatch.setattr(metric, 'lower_threshold_value_limit', new_lower_bound)
+        monkeypatch.setattr(metric, 'upper_threshold_value_limit', new_upper_bound)
 
     return estimator, new_lower_bound, new_upper_bound
 

From fc6d2537c73a58d24f83a3025673e8f4b846b8fd Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Thu, 8 Feb 2024 12:43:30 +0200
Subject: [PATCH 03/10] mypy and flake8 cbpe fixes

---
 .../confidence_based/cbpe.py                  |   8 -
 .../confidence_based/metrics.py               | 157 ++++++++++++------
 .../confidence_based/results.py               |   9 +-
 3 files changed, 112 insertions(+), 62 deletions(-)

diff --git a/nannyml/performance_estimation/confidence_based/cbpe.py b/nannyml/performance_estimation/confidence_based/cbpe.py
index 2bfa7287a..add5e5776 100644
--- a/nannyml/performance_estimation/confidence_based/cbpe.py
+++ b/nannyml/performance_estimation/confidence_based/cbpe.py
@@ -298,14 +298,6 @@ def __init__(
 
         self.result: Optional[Result] = None
 
-    def __deepcopy__(self, memodict={}):
-        cls = self.__class__
-        result = cls.__new__(cls, y_pred_proba=self.y_pred_proba, problem_type=self.problem_type)
-        memodict[id(self)] = result
-        for k, v in self.__dict__.items():
-            setattr(result, k, copy.deepcopy(v, memodict))
-        return result
-
     @log_usage(UsageEvent.CBPE_ESTIMATOR_FIT, metadata_from_self=['metrics', 'problem_type'])
     def _fit(self, reference_data: pd.DataFrame, *args, **kwargs) -> CBPE:
         """Fits the drift calculator using a set of reference data.
diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py
index b92e0d3a4..f6663edcc 100644
--- a/nannyml/performance_estimation/confidence_based/metrics.py
+++ b/nannyml/performance_estimation/confidence_based/metrics.py
@@ -94,7 +94,6 @@ def __init__(
 
         Notes
         -----
-
         The `components` approach taken here is a quick fix to deal with metrics that return multiple values.
         Look at the `confusion_matrix` for example: a single metric produces 4 different result sets (containing values,
         thresholds, alerts, etc.).
@@ -123,25 +122,25 @@ def _logger(self) -> logging.Logger:
         return logging.getLogger(__name__)
 
     @property
-    def display_name(self) -> str:
+    def display_name(self) -> str:  # noqa: D102
         return self.name
 
     @property
-    def column_name(self) -> str:
+    def column_name(self) -> str:  # noqa: D102
         return self.components[0][1]
 
     @property
-    def display_names(self):
+    def display_names(self):  # noqa: D102
         return [c[0] for c in self.components]
 
     @property
-    def column_names(self):
+    def column_names(self):  # noqa: D102
         return [c[1] for c in self.components]
 
-    def __str__(self):
+    def __str__(self):  # noqa: D105
         return self.display_name
 
-    def __repr__(self):
+    def __repr__(self):  # noqa: D105
         return self.column_name
 
     def fit(self, reference_data: pd.DataFrame):
@@ -211,6 +210,19 @@ def alert(self, value: float) -> bool:
         )
 
     def __eq__(self, other):
+        """Compares two Metric instances.
+
+        They are considered equal when their components are equal.
+
+        Parameters
+        ----------
+        other: Metric
+            The other Metric instance you're comparing to.
+
+        Returns
+        -------
+        is_equal: bool
+        """
         return self.components == other.components
 
     def _common_cleaning(
@@ -239,6 +251,7 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
         ----------
         chunk_data : pd.DataFrame
             A pandas dataframe containing the data for a given chunk.
+
         Raises
         ------
             NotImplementedError: occurs when a metric has multiple componets
@@ -296,6 +309,7 @@ def _logger(cls) -> logging.Logger:
 
     @classmethod
     def create(cls, key: str, use_case: ProblemType, **kwargs) -> Metric:
+        """Create new Metric."""
         if kwargs is None:
             kwargs = {}
 
@@ -321,6 +335,7 @@ def create(cls, key: str, use_case: ProblemType, **kwargs) -> Metric:
 
     @classmethod
     def register(cls, metric: str, use_case: ProblemType) -> Callable:
+        """Register a Metric in the MetricFactory registry."""
         def inner_wrapper(wrapped_class: Type[Metric]) -> Type[Metric]:
             if metric in cls.registry:
                 if use_case in cls.registry[metric]:
@@ -335,6 +350,7 @@ def inner_wrapper(wrapped_class: Type[Metric]) -> Type[Metric]:
 
 @MetricFactory.register('roc_auc', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationAUROC(Metric):
+    """CBPE binary classification AUROC Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -345,6 +361,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification AUROC Metric Class."""
         super().__init__(
             name='roc_auc',
             y_pred_proba=y_pred_proba,
@@ -428,6 +445,7 @@ def estimate_roc_auc(y_pred_proba: pd.Series) -> float:
 
 @MetricFactory.register('f1', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationF1(Metric):
+    """CBPE binary classification f1 Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -438,6 +456,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification f1 Metric Class."""
         super().__init__(
             name='f1',
             y_pred_proba=y_pred_proba,
@@ -512,6 +531,7 @@ def estimate_f1(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float:
 
 @MetricFactory.register('precision', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationPrecision(Metric):
+    """CBPE binary classification precision Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -522,6 +542,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification precision Metric Class."""
         super().__init__(
             name='precision',
             y_pred_proba=y_pred_proba,
@@ -596,6 +617,7 @@ def estimate_precision(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> floa
 
 @MetricFactory.register('recall', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationRecall(Metric):
+    """CBPE binary classification recall Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -606,6 +628,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification recall Metric Class."""
         super().__init__(
             name='recall',
             y_pred_proba=y_pred_proba,
@@ -679,6 +702,7 @@ def estimate_recall(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float:
 
 @MetricFactory.register('specificity', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationSpecificity(Metric):
+    """CBPE binary classification specificity Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -689,6 +713,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification specificity Metric Class."""
         super().__init__(
             name='specificity',
             y_pred_proba=y_pred_proba,
@@ -763,6 +788,7 @@ def estimate_specificity(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> fl
 
 @MetricFactory.register('accuracy', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationAccuracy(Metric):
+    """CBPE binary classification accuracy Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -773,6 +799,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification accuracy Metric Class."""
         super().__init__(
             name='accuracy',
             y_pred_proba=y_pred_proba,
@@ -828,6 +855,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('confusion_matrix', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationConfusionMatrix(Metric):
+    """CBPE binary classification confusion matrix Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -839,6 +867,7 @@ def __init__(
         normalize_confusion_matrix: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification confusion matrix Metric Class."""
         super().__init__(
             name='confusion_matrix',
             y_pred_proba=y_pred_proba,
@@ -870,6 +899,7 @@ def __init__(
 
     def fit(self, reference_data: pd.DataFrame):  # override the superclass fit method
         """Fits a Metric on reference data.
+
         Parameters
         ----------
         reference_data: pd.DataFrame
@@ -1283,18 +1313,14 @@ def get_true_pos_info(self, chunk_data: pd.DataFrame) -> Dict:
         true_pos_info['sampling_error_true_positive'] = sampling_error_true_positives
         true_pos_info['realized_true_positive'] = self._true_positive_realized_performance(chunk_data)
 
-        if self.normalize_confusion_matrix is None:
-            true_pos_info['upper_confidence_boundary_true_positive'] = (
-                estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives
-            )
-        else:
-            true_pos_info['upper_confidence_boundary_true_positive'] = np.minimum(
-                self.upper_threshold_value_limit,
-                estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives,
-            )
+        true_pos_info['upper_confidence_boundary_true_positive'] = np.minimum(
+            self.upper_threshold_value_limit or np.inf,
+            estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives,
+        )
 
         true_pos_info['lower_confidence_boundary_true_positive'] = np.maximum(
-            self.lower_threshold_value_limit, estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives
+            self.lower_threshold_value_limit or -np.inf,
+            estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives
         )
 
         true_pos_info['upper_threshold_true_positive'] = self.true_positive_upper_threshold
@@ -1335,18 +1361,14 @@ def get_true_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
         true_neg_info['sampling_error_true_negative'] = sampling_error_true_negatives
         true_neg_info['realized_true_negative'] = self._true_negative_realized_performance(chunk_data)
 
-        if self.normalize_confusion_matrix is None:
-            true_neg_info['upper_confidence_boundary_true_negative'] = (
-                estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives
-            )
-        else:
-            true_neg_info['upper_confidence_boundary_true_negative'] = np.minimum(
-                self.upper_threshold_value_limit,
-                estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives,
-            )
+        true_neg_info['upper_confidence_boundary_true_negative'] = np.minimum(
+            self.upper_threshold_value_limit or np.inf,
+            estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives,
+        )
 
         true_neg_info['lower_confidence_boundary_true_negative'] = np.maximum(
-            self.lower_threshold_value_limit, estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives
+            self.lower_threshold_value_limit or -np.inf,
+            estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives
         )
 
         true_neg_info['upper_threshold_true_negative'] = self.true_negative_upper_threshold
@@ -1387,18 +1409,13 @@ def get_false_pos_info(self, chunk_data: pd.DataFrame) -> Dict:
         false_pos_info['sampling_error_false_positive'] = sampling_error_false_positives
         false_pos_info['realized_false_positive'] = self._false_positive_realized_performance(chunk_data)
 
-        if self.normalize_confusion_matrix is None:
-            false_pos_info['upper_confidence_boundary_false_positive'] = (
-                estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives
-            )
-        else:
-            false_pos_info['upper_confidence_boundary_false_positive'] = np.minimum(
-                self.upper_threshold_value_limit,
-                estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives,
-            )
+        false_pos_info['upper_confidence_boundary_false_positive'] = np.minimum(
+            self.upper_threshold_value_limit or np.inf,
+            estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives,
+        )
 
         false_pos_info['lower_confidence_boundary_false_positive'] = np.maximum(
-            self.lower_threshold_value_limit,
+            self.lower_threshold_value_limit or -np.inf,
             estimated_false_positives - SAMPLING_ERROR_RANGE * sampling_error_false_positives,
         )
 
@@ -1440,18 +1457,13 @@ def get_false_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
         false_neg_info['sampling_error_false_negative'] = sampling_error_false_negatives
         false_neg_info['realized_false_negative'] = self._false_negative_realized_performance(chunk_data)
 
-        if self.normalize_confusion_matrix is None:
-            false_neg_info['upper_confidence_boundary_false_negative'] = (
-                estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives
-            )
-        else:
-            false_neg_info['upper_confidence_boundary_false_negative'] = np.minimum(
-                self.upper_threshold_value_limit,
-                estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
-            )
+        false_neg_info['upper_confidence_boundary_false_negative'] = np.minimum(
+            self.upper_threshold_value_limit or np.inf,
+            estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
+        )
 
         false_neg_info['lower_confidence_boundary_false_negative'] = np.maximum(
-            self.lower_threshold_value_limit,
+            self.lower_threshold_value_limit or -np.inf,
             estimated_false_negatives - SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
         )
 
@@ -1469,6 +1481,18 @@ def get_false_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
         return false_neg_info
 
     def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
+        """Returns a dictionary containing the performance metrics for a given chunk.
+
+        Parameters
+        ----------
+        chunk_data : pd.DataFrame
+            A pandas dataframe containing the data for a given chunk.
+
+        Returns
+        -------
+            chunk_record : Dict
+                A dictionary of perfomance metric, value pairs.
+        """
         chunk_record = {}
 
         true_pos_info = self.get_true_pos_info(chunk_data)
@@ -1497,6 +1521,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('business_value', ProblemType.CLASSIFICATION_BINARY)
 class BinaryClassificationBusinessValue(Metric):
+    """CBPE binary classification business value Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -1509,6 +1534,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE binary classification business value Metric Class."""
         super().__init__(
             name='business_value',
             y_pred_proba=y_pred_proba,
@@ -1606,15 +1632,19 @@ def estimate_business_value(
     normalize_business_value: str, default=None
         Determines how the business value will be normalized. Allowed values are None and 'per_prediction'.
 
-            - None - the business value will not be normalized and the value returned will be the total value per chunk.
-            - 'per_prediction' - the value will be normalized by the number of predictions in the chunk.
+        - None - the business value will not be normalized and the value returned will be the total value per chunk.
+        - 'per_prediction' - the value will be normalized by the number of predictions in the chunk.
+
+    business_value_matrix: np.ndarray
+        A 2x2 matrix that specifies the value of each cell in the confusion matrix.
+        The format of the business value matrix must be specified as [[value_of_TN, value_of_FP], \
+        [value_of_FN, value_of_TP]].
 
     Returns
     -------
     business_value: float
         Estimated Business Value score.
     """
-
     est_tn_ratio = np.mean(np.where(y_pred == 0, 1 - y_pred_proba, 0))
     est_tp_ratio = np.mean(np.where(y_pred == 1, y_pred_proba, 0))
     est_fp_ratio = np.mean(np.where(y_pred == 1, 1 - y_pred_proba, 0))
@@ -1682,6 +1712,7 @@ def _ensure_targets(self, data: pd.DataFrame) -> Optional[pd.DataFrame]:
 
 @MetricFactory.register('roc_auc', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationAUROC(_MulticlassClassificationMetric):
+    """CBPE multiclass classification AUROC Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -1692,6 +1723,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification AUROC Metric Class."""
         super().__init__(
             name='roc_auc',
             y_pred_proba=y_pred_proba,
@@ -1746,6 +1778,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('f1', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationF1(_MulticlassClassificationMetric):
+    """CBPE multiclass classification f1 Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -1756,6 +1789,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification f1 Metric Class."""
         super().__init__(
             name='f1',
             y_pred_proba=y_pred_proba,
@@ -1813,6 +1847,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('precision', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationPrecision(_MulticlassClassificationMetric):
+    """CBPE multiclass classification precision Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -1823,6 +1858,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification precision Metric Class."""
         super().__init__(
             name='precision',
             y_pred_proba=y_pred_proba,
@@ -1880,6 +1916,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('recall', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationRecall(_MulticlassClassificationMetric):
+    """CBPE multiclass classification recall Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -1890,6 +1927,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification recall Metric Class."""
         super().__init__(
             name='recall',
             y_pred_proba=y_pred_proba,
@@ -1947,6 +1985,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('specificity', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationSpecificity(_MulticlassClassificationMetric):
+    """CBPE multiclass classification specificity Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -1957,6 +1996,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification specificity Metric Class."""
         super().__init__(
             name='specificity',
             y_pred_proba=y_pred_proba,
@@ -2018,6 +2058,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('accuracy', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationAccuracy(_MulticlassClassificationMetric):
+    """CBPE multiclass classification accuracy Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -2028,6 +2069,7 @@ def __init__(
         timestamp_column_name: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification accuracy Metric Class."""
         super().__init__(
             name='accuracy',
             y_pred_proba=y_pred_proba,
@@ -2082,6 +2124,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float:
 
 @MetricFactory.register('confusion_matrix', ProblemType.CLASSIFICATION_MULTICLASS)
 class MulticlassClassificationConfusionMatrix(Metric):
+    """CBPE multiclass classification confusion matrix Metric Class."""
     def __init__(
         self,
         y_pred_proba: ModelOutputsType,
@@ -2093,6 +2136,7 @@ def __init__(
         normalize_confusion_matrix: Optional[str] = None,
         **kwargs,
     ):
+        """Initialize CBPE multiclass classification confusion matrix Metric Class."""
         if isinstance(y_pred_proba, str):
             raise ValueError(
                 "y_pred_proba must be a dictionary with class labels as keys and pred_proba column names as values"
@@ -2135,6 +2179,7 @@ def _get_components(self, classes: List[str]) -> List[Tuple[str, str]]:
 
     def fit(self, reference_data: pd.DataFrame):  # override the superclass fit method
         """Fits a Metric on reference data.
+
         Parameters
         ----------
         reference_data: pd.DataFrame
@@ -2247,6 +2292,18 @@ def _get_multiclass_confusion_matrix_estimate(self, chunk_data: pd.DataFrame) ->
         return normalized_est_confusion_matrix
 
     def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
+        """Returns a dictionary containing the performance metrics for a given chunk.
+
+        Parameters
+        ----------
+        chunk_data : pd.DataFrame
+            A pandas dataframe containing the data for a given chunk.
+
+        Returns
+        -------
+            chunk_record : Dict
+                A dictionary of perfomance metric, value pairs.
+        """
         chunk_record = {}
 
         estimated_cm = self._get_multiclass_confusion_matrix_estimate(chunk_data)
diff --git a/nannyml/performance_estimation/confidence_based/results.py b/nannyml/performance_estimation/confidence_based/results.py
index 8de0ffd74..308b897d0 100644
--- a/nannyml/performance_estimation/confidence_based/results.py
+++ b/nannyml/performance_estimation/confidence_based/results.py
@@ -37,7 +37,8 @@ def __init__(
         problem_type: ProblemType,
         timestamp_column_name: Optional[str] = None,
     ):
-        """
+        """Initialize CBPE results class.
+
         Parameters
         ----------
         results_data: pd.DataFrame
@@ -124,9 +125,7 @@ def _get_metric_by_name(self, name: str) -> Optional[Metric]:
         return None
 
     def keys(self) -> List[Key]:
-        """
-        Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')`
-        """
+        """Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')`."""
         return [
             Key(
                 properties=(component[1],),
@@ -155,6 +154,8 @@ def plot(
         Parameters
         ----------
         kind: str, default='performance'
+            What kind of plot to create. Only performance type is available.
+
 
         Raises
         ------

From 2b4655a603faa51b7b9f49eaf7467490bd1285d9 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Thu, 8 Feb 2024 21:24:00 +0200
Subject: [PATCH 04/10] dont allow confidence range to go above/below threshold
 limits

---
 nannyml/data_quality/missing/calculator.py | 17 +++++++++++++----
 nannyml/data_quality/missing/result.py     | 14 +++++++-------
 2 files changed, 20 insertions(+), 11 deletions(-)

diff --git a/nannyml/data_quality/missing/calculator.py b/nannyml/data_quality/missing/calculator.py
index d9bf3513a..1ec278725 100644
--- a/nannyml/data_quality/missing/calculator.py
+++ b/nannyml/data_quality/missing/calculator.py
@@ -62,6 +62,9 @@ def __init__(
             Only one of `chunk_size`, `chunk_number` or `chunk_period` should be given.
         chunker : Chunker
             The `Chunker` used to split the data sets into a lists of chunks.
+        threshold: Threshold, default=StandardDeviationThreshold
+            The threshold you wish to evaluate values on. Defaults to a StandardDeviationThreshold with default
+            options. The other available value is ConstantThreshold.
 
 
         Examples
@@ -102,14 +105,14 @@ def __init__(
         self._lower_alert_thresholds: Dict[str, Optional[float]] = {column_name: 0 for column_name in self.column_names}
 
         self.lower_threshold_value_limit: float = 0
-        self.upper_threshold_value_limit: float
+        self.upper_threshold_value_limit: Optional[float] = None
         self.normalize = normalize
         if self.normalize:
             self.data_quality_metric = 'missing_values_rate'
             self.upper_threshold_value_limit = 1
         else:
             self.data_quality_metric = 'missing_values_count'
-            self.upper_threshold_value_limit = np.nan
+            # self.upper_threshold_value_limit = np.nan
 
     def _calculate_missing_value_stats(self, data: pd.Series):
         count_tot = data.shape[0]
@@ -217,8 +220,14 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st
         else:
             result['sampling_error'] = serr * np.sqrt(tot)
 
-        result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
-        result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']
+        result['upper_confidence_boundary'] = np.minimum(
+            result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error'],
+            self.upper_threshold_value_limit or np.inf
+        )
+        result['lower_confidence_boundary'] = np.maximum(
+            result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'],
+            self.lower_threshold_value_limit or -np.inf
+        )
 
         result['upper_threshold'] = self._upper_alert_thresholds[column_name]
         result['lower_threshold'] = self._lower_alert_thresholds[column_name]
diff --git a/nannyml/data_quality/missing/result.py b/nannyml/data_quality/missing/result.py
index 4bba51c6f..0f723e7cf 100644
--- a/nannyml/data_quality/missing/result.py
+++ b/nannyml/data_quality/missing/result.py
@@ -24,7 +24,10 @@
 
 
 class Result(PerColumnResult, ResultCompareMixin):
-    """Contains the results of the univariate statistical drift calculation and provides plotting functionality."""
+    """Missing Values Result Class.
+
+    Contains calculation results and provides plotting functionality.
+    """
 
     def __init__(
         self,
@@ -34,13 +37,14 @@ def __init__(
         timestamp_column_name: Optional[str],
         chunker: Chunker,
     ):
+        """Initialize Missing Values Result Class."""
         super().__init__(results_data, column_names)
 
         self.timestamp_column_name = timestamp_column_name
         self.data_quality_metric = data_quality_metric
         self.chunker = chunker
 
-    def keys(self) -> List[Key]:
+    def keys(self) -> List[Key]:  # noqa: D102
         return [
             Key(
                 properties=(column_name,),
@@ -55,10 +59,7 @@ def plot(
         *args,
         **kwargs,
     ) -> go.Figure:
-        """
-
-        Parameters
-        ----------
+        """Plot Missing Values results.
 
         Returns
         -------
@@ -82,7 +83,6 @@ def plot(
         ...     res = res.filter(period='analysis', column_name=column_name).plot().show()
 
         """
-
         return plot_metrics(
             self,
             title='Data Quality ',

From 0cf3ff45a6d0b4c8d55e0443f42ef2cd4b728824 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Thu, 8 Feb 2024 22:00:40 +0200
Subject: [PATCH 05/10] fix check error on 0

---
 nannyml/data_quality/missing/calculator.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/nannyml/data_quality/missing/calculator.py b/nannyml/data_quality/missing/calculator.py
index 1ec278725..ad1e1b5a2 100644
--- a/nannyml/data_quality/missing/calculator.py
+++ b/nannyml/data_quality/missing/calculator.py
@@ -112,7 +112,6 @@ def __init__(
             self.upper_threshold_value_limit = 1
         else:
             self.data_quality_metric = 'missing_values_count'
-            # self.upper_threshold_value_limit = np.nan
 
     def _calculate_missing_value_stats(self, data: pd.Series):
         count_tot = data.shape[0]
@@ -222,11 +221,11 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st
 
         result['upper_confidence_boundary'] = np.minimum(
             result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error'],
-            self.upper_threshold_value_limit or np.inf
+            np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit
         )
         result['lower_confidence_boundary'] = np.maximum(
             result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'],
-            self.lower_threshold_value_limit or -np.inf
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit
         )
 
         result['upper_threshold'] = self._upper_alert_thresholds[column_name]

From b94c3f8f79ed1384588e8119c5e446e613ee7863 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Thu, 8 Feb 2024 22:12:27 +0200
Subject: [PATCH 06/10] fix confidence range tests for BC CBPE when limit is 0

---
 .../confidence_based/metrics.py               | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py
index f6663edcc..b9fa40ac9 100644
--- a/nannyml/performance_estimation/confidence_based/metrics.py
+++ b/nannyml/performance_estimation/confidence_based/metrics.py
@@ -281,12 +281,12 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
         chunk_record[f'realized_{column_name}'] = self._realized_performance(chunk_data)
 
         chunk_record[f'upper_confidence_boundary_{column_name}'] = np.minimum(
-            self.upper_threshold_value_limit or np.inf,
+            np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit,
             estimated_metric_value + SAMPLING_ERROR_RANGE * metric_estimate_sampling_error,
         )
 
         chunk_record[f'lower_confidence_boundary_{column_name}'] = np.maximum(
-            self.lower_threshold_value_limit or -np.inf,
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
             estimated_metric_value - SAMPLING_ERROR_RANGE * metric_estimate_sampling_error,
         )
 
@@ -1314,12 +1314,12 @@ def get_true_pos_info(self, chunk_data: pd.DataFrame) -> Dict:
         true_pos_info['realized_true_positive'] = self._true_positive_realized_performance(chunk_data)
 
         true_pos_info['upper_confidence_boundary_true_positive'] = np.minimum(
-            self.upper_threshold_value_limit or np.inf,
+            np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit,
             estimated_true_positives + SAMPLING_ERROR_RANGE * sampling_error_true_positives,
         )
 
         true_pos_info['lower_confidence_boundary_true_positive'] = np.maximum(
-            self.lower_threshold_value_limit or -np.inf,
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
             estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives
         )
 
@@ -1362,12 +1362,12 @@ def get_true_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
         true_neg_info['realized_true_negative'] = self._true_negative_realized_performance(chunk_data)
 
         true_neg_info['upper_confidence_boundary_true_negative'] = np.minimum(
-            self.upper_threshold_value_limit or np.inf,
+            np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit,
             estimated_true_negatives + SAMPLING_ERROR_RANGE * sampling_error_true_negatives,
         )
 
         true_neg_info['lower_confidence_boundary_true_negative'] = np.maximum(
-            self.lower_threshold_value_limit or -np.inf,
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
             estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives
         )
 
@@ -1410,12 +1410,12 @@ def get_false_pos_info(self, chunk_data: pd.DataFrame) -> Dict:
         false_pos_info['realized_false_positive'] = self._false_positive_realized_performance(chunk_data)
 
         false_pos_info['upper_confidence_boundary_false_positive'] = np.minimum(
-            self.upper_threshold_value_limit or np.inf,
+            np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit,
             estimated_false_positives + SAMPLING_ERROR_RANGE * sampling_error_false_positives,
         )
 
         false_pos_info['lower_confidence_boundary_false_positive'] = np.maximum(
-            self.lower_threshold_value_limit or -np.inf,
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
             estimated_false_positives - SAMPLING_ERROR_RANGE * sampling_error_false_positives,
         )
 
@@ -1458,12 +1458,12 @@ def get_false_neg_info(self, chunk_data: pd.DataFrame) -> Dict:
         false_neg_info['realized_false_negative'] = self._false_negative_realized_performance(chunk_data)
 
         false_neg_info['upper_confidence_boundary_false_negative'] = np.minimum(
-            self.upper_threshold_value_limit or np.inf,
+            np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit,
             estimated_false_negatives + SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
         )
 
         false_neg_info['lower_confidence_boundary_false_negative'] = np.maximum(
-            self.lower_threshold_value_limit or -np.inf,
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
             estimated_false_negatives - SAMPLING_ERROR_RANGE * sampling_error_false_negatives,
         )
 

From 9966fd3d93e6c49e76919c0dbe3a8ecefea13123 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Thu, 8 Feb 2024 22:12:27 +0200
Subject: [PATCH 07/10] fix confidence range tests for MC CBPE when limit is 0

---
 .../confidence_based/metrics.py               | 26 ++++++-------------
 1 file changed, 8 insertions(+), 18 deletions(-)

diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py
index b9fa40ac9..d81d73712 100644
--- a/nannyml/performance_estimation/confidence_based/metrics.py
+++ b/nannyml/performance_estimation/confidence_based/metrics.py
@@ -2337,30 +2337,20 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict:
                     + SAMPLING_ERROR_RANGE
                     * sampling_error[self.classes.index(true_class), self.classes.index(pred_class)]
                 )
-
-                if self.normalize_confusion_matrix is None:
-                    chunk_record[
-                        f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'
-                    ] = upper_confidence_boundary
-                else:
-                    chunk_record[f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'] = min(
-                        self.upper_threshold_value_limit, upper_confidence_boundary
-                    )
+                chunk_record[f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'] = min(
+                    np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit,
+                    upper_confidence_boundary
+                )
 
                 lower_confidence_boundary = (
                     estimated_cm[self.classes.index(true_class), self.classes.index(pred_class)]
                     - SAMPLING_ERROR_RANGE
                     * sampling_error[self.classes.index(true_class), self.classes.index(pred_class)]
                 )
-
-                if self.normalize_confusion_matrix is None:
-                    chunk_record[
-                        f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'
-                    ] = lower_confidence_boundary
-                else:
-                    chunk_record[f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'] = max(
-                        self.lower_threshold_value_limit, lower_confidence_boundary
-                    )
+                chunk_record[f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'] = max(
+                    -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit,
+                    lower_confidence_boundary
+                )
 
                 chunk_record[f'upper_threshold_true_{true_class}_pred_{pred_class}'] = self.alert_thresholds[
                     f'true_{true_class}_pred_{pred_class}'

From 5246101c389503f24ea4f329f19816327b9e91cc Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Thu, 8 Feb 2024 23:18:02 +0200
Subject: [PATCH 08/10] fix lower limit 0 for stats std values thresholds and
 confidence

---
 nannyml/stats/std/calculator.py | 13 ++++++++-----
 nannyml/stats/std/result.py     | 11 ++++-------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/nannyml/stats/std/calculator.py b/nannyml/stats/std/calculator.py
index 1f971fb87..4dbea7a80 100644
--- a/nannyml/stats/std/calculator.py
+++ b/nannyml/stats/std/calculator.py
@@ -2,7 +2,7 @@
 #
 #  License: Apache Software License 2.0
 
-"""Simple Statistics Average Calculator"""
+"""Simple Statistics Standard Deviation Module."""
 
 from typing import Any, Dict, List, Optional, Tuple, Union
 
@@ -25,7 +25,7 @@
 
 
 class SummaryStatsStdCalculator(AbstractCalculator):
-    """SummaryStatsStdCalculator implementation"""
+    """Simple Statistics Standard Deviation Calculator."""
 
     def __init__(
         self,
@@ -102,8 +102,8 @@ def __init__(
         self._upper_alert_thresholds: Dict[str, Optional[float]] = {column_name: 0 for column_name in self.column_names}
         self._lower_alert_thresholds: Dict[str, Optional[float]] = {column_name: 0 for column_name in self.column_names}
 
-        self.lower_threshold_value_limit: float = np.nan
-        self.upper_threshold_value_limit: float = np.nan
+        self.lower_threshold_value_limit: float = 0
+        self.upper_threshold_value_limit: Optional[float] = None
         self.simple_stats_metric = 'values_std'
 
     @log_usage(UsageEvent.STATS_STD_FIT)
@@ -203,7 +203,10 @@ def _calculate_for_column(self, data: pd.DataFrame, column_name: str) -> Dict[st
             self._sampling_error_components[column_name], data[column_name]
         )
         result['upper_confidence_boundary'] = result['value'] + SAMPLING_ERROR_RANGE * result['sampling_error']
-        result['lower_confidence_boundary'] = result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error']
+        result['lower_confidence_boundary'] = np.maximum(
+            result['value'] - SAMPLING_ERROR_RANGE * result['sampling_error'],
+            -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit
+        )
 
         result['upper_threshold'] = self._upper_alert_thresholds[column_name]
         result['lower_threshold'] = self._lower_alert_thresholds[column_name]
diff --git a/nannyml/stats/std/result.py b/nannyml/stats/std/result.py
index 984abce94..ccd0ffe28 100644
--- a/nannyml/stats/std/result.py
+++ b/nannyml/stats/std/result.py
@@ -26,7 +26,7 @@
 
 
 class Result(PerColumnResult, ResultCompareMixin):
-    """Contains the results of the univariate statistical drift calculation and provides plotting functionality."""
+    """Summary Stats Standard Deviation Calculator Results object."""
 
     def __init__(
         self,
@@ -36,13 +36,14 @@ def __init__(
         timestamp_column_name: Optional[str],
         chunker: Chunker,
     ):
+        """Initalize Summary Stats Standard Deviation Calculator Results object."""
         super().__init__(results_data, column_names)
 
         self.timestamp_column_name = timestamp_column_name
         self.simple_stats_metric = simple_stats_metric
         self.chunker = chunker
 
-    def keys(self) -> List[Key]:
+    def keys(self) -> List[Key]:  # noqa: D102
         return [
             Key(
                 properties=(column_name,),
@@ -57,10 +58,7 @@ def plot(
         *args,
         **kwargs,
     ) -> go.Figure:
-        """
-
-        Parameters
-        ----------
+        """Plot Summary Stats Standard Deviation Calculator Results.
 
         Returns
         -------
@@ -84,7 +82,6 @@ def plot(
         ...     res = res.filter(period='analysis', column_name=column_name).plot().show()
 
         """
-
         return plot_metrics(
             self,
             title='Values Standard Deviation',

From b479b68cf6b6c39d8a4481603e1eebfb47632e0e Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Fri, 9 Feb 2024 18:01:37 +0200
Subject: [PATCH 09/10] fix binary classification CM elements threshold logic

---
 nannyml/performance_calculation/calculator.py          |  4 ++--
 .../metrics/binary_classification.py                   | 10 ++++------
 nannyml/performance_calculation/result.py              |  5 ++---
 3 files changed, 8 insertions(+), 11 deletions(-)

diff --git a/nannyml/performance_calculation/calculator.py b/nannyml/performance_calculation/calculator.py
index 294e6d8bb..26b9c483b 100644
--- a/nannyml/performance_calculation/calculator.py
+++ b/nannyml/performance_calculation/calculator.py
@@ -17,7 +17,6 @@
 
 Examples
 --------
-
 >>> import nannyml as nml
 >>> from IPython.display import display
 >>> reference_df, analysis_df, analysis_targets_df = nml.load_synthetic_car_loan_dataset()
@@ -113,6 +112,7 @@ def __init__(
                 - 'regression'
                 - 'classification_binary'
                 - 'classification_multiclass'
+
         y_pred_proba: ModelOutputsType, default=None
             Name(s) of the column(s) containing your model output.
             Pass a single string when there is only a single model output column, e.g. in binary classification cases.
@@ -254,7 +254,7 @@ def __init__(
 
         self.result: Optional[Result] = None
 
-    def __str__(self):
+    def __str__(self):  # noqa: D105
         return f"PerformanceCalculator[metrics={str(self.metrics)}]"
 
     @log_usage(UsageEvent.PERFORMANCE_CALC_FIT, metadata_from_self=['metrics', 'problem_type'])
diff --git a/nannyml/performance_calculation/metrics/binary_classification.py b/nannyml/performance_calculation/metrics/binary_classification.py
index d302251d0..a3e6d1eb7 100644
--- a/nannyml/performance_calculation/metrics/binary_classification.py
+++ b/nannyml/performance_calculation/metrics/binary_classification.py
@@ -600,13 +600,11 @@ def __init__(
                 ('False Positive', 'false_positive'),
                 ('False Negative', 'false_negative'),
             ],
+            lower_threshold_limit=0
         )
 
-        self.lower_threshold_limit: Optional[float] = 0.0 if normalize_confusion_matrix else None
-        self.upper_threshold_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None
-
+        self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None
         self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix
-
         # sampling error
         self._sampling_error_components: Tuple = ()
 
@@ -683,8 +681,8 @@ def _calculate_confusion_matrix_alert_thresholds(
         lower_threshold_value, upper_threshold_value = calculate_threshold_values(
             threshold=self.threshold,
             data=np.asarray(chunked_reference_metric),
-            lower_threshold_value_limit=self.lower_threshold_limit,
-            upper_threshold_value_limit=self.upper_threshold_limit,
+            lower_threshold_value_limit=self.lower_threshold_value_limit,
+            upper_threshold_value_limit=self.upper_threshold_value_limit,
             logger=self._logger,
             metric_name=self.display_name,
         )
diff --git a/nannyml/performance_calculation/result.py b/nannyml/performance_calculation/result.py
index 62aa17278..9475dfe01 100644
--- a/nannyml/performance_calculation/result.py
+++ b/nannyml/performance_calculation/result.py
@@ -84,9 +84,7 @@ def __init__(
         self.analysis_data = analysis_data
 
     def keys(self) -> List[Key]:
-        """
-        Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')`
-        """
+        """Creates a list of keys where each Key is a `namedtuple('Key', 'properties display_names')`."""
         return [
             Key(
                 properties=(component[1],),
@@ -108,6 +106,7 @@ def plot(
         **kwargs,
     ) -> go.Figure:
         """Render realized performance metrics.
+
         This function will return a :class:`plotly.graph_objects.Figure` object.
 
         Parameters

From 069496a9a81cf4ebc62e8c4450b1f422e93d43b9 Mon Sep 17 00:00:00 2001
From: Nikolaos Perrakis <nikos@nannyml.com>
Date: Fri, 9 Feb 2024 18:13:24 +0200
Subject: [PATCH 10/10] fix realized perf MC CM threshold logic

---
 .../metrics/multiclass_classification.py                        | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nannyml/performance_calculation/metrics/multiclass_classification.py b/nannyml/performance_calculation/metrics/multiclass_classification.py
index 0e0ec1d65..6179477ca 100644
--- a/nannyml/performance_calculation/metrics/multiclass_classification.py
+++ b/nannyml/performance_calculation/metrics/multiclass_classification.py
@@ -596,9 +596,11 @@ def __init__(
             threshold=threshold,
             y_pred_proba=y_pred_proba,
             components=[("None", "none")],
+            lower_threshold_limit=0
         )
 
         self.normalize_confusion_matrix: Optional[str] = normalize_confusion_matrix
+        self.upper_threshold_value_limit: Optional[float] = 1.0 if normalize_confusion_matrix else None
 
         self.classes: Optional[List[str]] = None