From aad7edf7e59b3edd2f184f20cf5f8b39ae5f0a0c Mon Sep 17 00:00:00 2001
From: Michael Van de Steene <124588413+michael-nml@users.noreply.github.com>
Date: Wed, 17 Jan 2024 01:46:23 +0100
Subject: [PATCH] Remove chi2 thresholds for analysis & reference (#349)

Chi2 thresholding is based on p-values, which means the calculated
thresholds are irrelevant. Previously thresholds were removed as part of
alerting. As this is only applied on analysis data the threshold value
was still available on reference data, which led to confusion.

This commit changes that so the chi2 threshold values are removed across
the entire dataset.
---
 nannyml/drift/univariate/methods.py | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/nannyml/drift/univariate/methods.py b/nannyml/drift/univariate/methods.py
index 156ce0cf..b5710318 100644
--- a/nannyml/drift/univariate/methods.py
+++ b/nannyml/drift/univariate/methods.py
@@ -443,6 +443,14 @@ def __init__(self, **kwargs) -> None:
         self._p_value: float
         self._fitted = False
 
+    def fit(self, reference_data: pd.Series, timestamps: Optional[pd.Series] = None) -> Self:
+        super().fit(reference_data, timestamps)
+
+        # Thresholding is based on p-values. Ignoring all custom thresholding and disable plotting a threshold
+        self.lower_threshold_value = None
+        self.upper_threshold_value = None
+        return self
+
     def _fit(self, reference_data: pd.Series, timestamps: Optional[pd.Series] = None) -> Self:
         reference_data = _remove_nans(reference_data)
         self._reference_data_vcs = reference_data.value_counts().loc[lambda v: v != 0]
@@ -462,9 +470,6 @@ def _calculate(self, data: pd.Series):
         return stat
 
     def alert(self, value: float):
-        self.lower_threshold_value = None  # ignoring all custom thresholding, disable plotting a threshold
-        self.upper_threshold_value = None  # ignoring all custom thresholding, disable plotting a threshold
-
         return self._p_value < 0.05
 
     def _calc_chi2(self, data: pd.Series):