From 7ae60a11a87c6c87d1b66127ba0334d5c71f944b Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 19:46:44 +0100 Subject: [PATCH 01/18] Generate 2 series in "setUp()" method --- ats/tests/test_evaluators.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index d8c9d86..61e5c60 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -26,6 +26,27 @@ def setUp(self): rnd.seed(123) np.random.seed(123) + series1 = generate_timeseries_df(entries=5, variables=2) + series1['anomaly_label'] = [None, 'anomaly_2', 'anomaly_1', None, 'anomaly_1'] + # series1 + # timestamp value_1 value_2 anomaly_label + # 2025-06-10 14:00:00+00:00 0.000000 0.707107 None + # 2025-06-10 15:00:00+00:00 0.841471 0.977061 anomaly_2 + # 2025-06-10 16:00:00+00:00 0.909297 0.348710 anomaly_1 + # 2025-06-10 17:00:00+00:00 0.141120 -0.600243 None + # 2025-06-10 18:00:00+00:00 -0.756802 -0.997336 anomaly_1 + series2 = generate_timeseries_df(entries=7, variables=2) + series2['anomaly_label'] = ['anomaly_1', 'anomaly_2', 'anomaly_1', None, 'anomaly_1', None, None] + # series2 + # timestamp value_1 value_2 anomaly_label + # 2025-06-10 14:00:00+00:00 0.000000 0.707107 anomaly_1 + # 2025-06-10 15:00:00+00:00 0.841471 0.977061 anomaly_2 + # 2025-06-10 16:00:00+00:00 0.909297 0.348710 anomaly_1 + # 2025-06-10 17:00:00+00:00 0.141120 -0.600243 None + # 2025-06-10 18:00:00+00:00 -0.756802 -0.997336 anomaly_1 + # 2025-06-10 19:00:00+00:00 -0.958924 -0.477482 None + # 2025-06-10 20:00:00+00:00 -0.279415 0.481366 None + def test_evaluate_anomaly_detector(self): min_max_anomaly_detector = MinMaxAnomalyDetector() From 4fca96d37fc5cdf7e6dfcf29c9be1d699aaf53c1 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 19:50:30 +0100 Subject: [PATCH 02/18] Change variables name inside test functions The names "series1" and "series2" have been changed to "series_1" and "series_2" inside test functions to be distinguished from series generated in the "setUp()" method --- ats/tests/test_evaluators.py | 56 ++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 61e5c60..7fe362d 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -324,9 +324,9 @@ def test_evaluate_point_granularity(self): effects = [] # series with 2880 data points series_generator = HumiTempTimeseriesGenerator() - series1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - series2 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - dataset = [series1,series2] + series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) + series_2 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) + dataset = [series_1,series_2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() @@ -353,9 +353,9 @@ def test_evaluate_variable_granularity(self): effects = [] # series with 2880 data points series_generator = HumiTempTimeseriesGenerator() - series1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - series2 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - dataset = [series1,series2] + series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) + series_2 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) + dataset = [series_1,series_2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() @@ -382,11 +382,11 @@ def test_evaluate_series_granularity(self): anomalies = ['step_uv'] effects = [] series_generator = HumiTempTimeseriesGenerator() - # series1 will be a true anomaly for the minmax - series1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - # series2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) - series2 = series_generator.generate(include_effect_label=True, anomalies=[],effects=effects) - dataset = [series1,series2] + # series_1 will be a true anomaly for the minmax + series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) + # series_2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) + series_2 = series_generator.generate(include_effect_label=True, anomalies=[],effects=effects) + dataset = [series_1,series_2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() @@ -412,11 +412,11 @@ def test_evaluate_series_granularity(self): def test_series_granularity_eval_with_non_detected_anomalies(self): effects = [] series_generator = HumiTempTimeseriesGenerator() - # series1 will be a true anomaly for the minmax - series1 = series_generator.generate(include_effect_label=True, anomalies=['step_uv'],effects=effects) - # series2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) - series2 = series_generator.generate(include_effect_label=True, anomalies=['pattern_uv'],effects=effects) - dataset = [series1,series2] + # series_1 will be a true anomaly for the minmax + series_1 = series_generator.generate(include_effect_label=True, anomalies=['step_uv'],effects=effects) + # series_2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) + series_2 = series_generator.generate(include_effect_label=True, anomalies=['pattern_uv'],effects=effects) + dataset = [series_1,series_2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() @@ -453,9 +453,9 @@ def test_raised_error_evaluation_series_granularity(self): def test_copy_dataset(self): series_generator = HumiTempTimeseriesGenerator() - series1 = series_generator.generate(include_effect_label=True, effects=['noise']) - series2 = series_generator.generate(include_effect_label=True, effects=['noise']) - dataset = [series1,series2] + series_1 = series_generator.generate(include_effect_label=True, effects=['noise']) + series_2 = series_generator.generate(include_effect_label=True, effects=['noise']) + dataset = [series_1,series_2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() @@ -510,9 +510,9 @@ def test_series_granularity_evaluation(self): self.assertEqual(len(evaluation_result),1) self.assertAlmostEqual(evaluation_result['step_uv'],1) - series1 = series_generator.generate(include_effect_label=True, anomalies=[]) + series_1 = series_generator.generate(include_effect_label=True, anomalies=[]) minmax1 = MinMaxAnomalyDetector() - formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(series1,synthetic=True) + formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(series_1,synthetic=True) flagged_series1 = minmax.apply(formatted_series1) evaluation_result1 = _series_granularity_evaluation(flagged_series1,anomaly_labels1) self.assertEqual(len(evaluation_result1),1) @@ -522,8 +522,8 @@ def test_series_granularity_evaluation(self): # } try: - series2 = series_generator.generate(include_effect_label=True, anomalies=['spike_uv','step_uv']) - formatted_series2,anomaly_labels2 = _format_for_anomaly_detector(series2,synthetic=True) + series_2 = series_generator.generate(include_effect_label=True, anomalies=['spike_uv','step_uv']) + formatted_series2,anomaly_labels2 = _format_for_anomaly_detector(series_2,synthetic=True) flagged_series2 = minmax.apply(formatted_series2) evaluation_result2 = _series_granularity_evaluation(flagged_series2,anomaly_labels2) except Exception as e: @@ -533,11 +533,11 @@ def test_double_evaluator(self): anomalies = ['step_uv'] effects = [] series_generator = HumiTempTimeseriesGenerator() - # series1 will be a true anomaly for the minmax - series1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - # series2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) - series2 = series_generator.generate(include_effect_label=True, anomalies=[],effects=effects) - dataset = [series1,series2] + # series_1 will be a true anomaly for the minmax + series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) + # series_2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) + series_2 = series_generator.generate(include_effect_label=True, anomalies=[],effects=effects) + dataset = [series_1,series_2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() From 3fcdfd38c2ba3d3d2a819c986a2ea45c036f307e Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 20:21:37 +0100 Subject: [PATCH 03/18] Amend an error in the "setUp()" method --- ats/tests/test_evaluators.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 7fe362d..9c67be1 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -26,8 +26,8 @@ def setUp(self): rnd.seed(123) np.random.seed(123) - series1 = generate_timeseries_df(entries=5, variables=2) - series1['anomaly_label'] = [None, 'anomaly_2', 'anomaly_1', None, 'anomaly_1'] + self.series1 = generate_timeseries_df(entries=5, variables=2) + self.series1['anomaly_label'] = [None, 'anomaly_2', 'anomaly_1', None, 'anomaly_1'] # series1 # timestamp value_1 value_2 anomaly_label # 2025-06-10 14:00:00+00:00 0.000000 0.707107 None @@ -35,8 +35,8 @@ def setUp(self): # 2025-06-10 16:00:00+00:00 0.909297 0.348710 anomaly_1 # 2025-06-10 17:00:00+00:00 0.141120 -0.600243 None # 2025-06-10 18:00:00+00:00 -0.756802 -0.997336 anomaly_1 - series2 = generate_timeseries_df(entries=7, variables=2) - series2['anomaly_label'] = ['anomaly_1', 'anomaly_2', 'anomaly_1', None, 'anomaly_1', None, None] + self.series2 = generate_timeseries_df(entries=7, variables=2) + self.series2['anomaly_label'] = ['anomaly_1', 'anomaly_2', 'anomaly_1', None, 'anomaly_1', None, None] # series2 # timestamp value_1 value_2 anomaly_label # 2025-06-10 14:00:00+00:00 0.000000 0.707107 anomaly_1 From e711f710ae45db9cc86df702d7d73e131109a1f4 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 20:47:09 +0100 Subject: [PATCH 04/18] Fix the evaluation strategy in "_variable_granularity_evaluation()" --- ats/evaluators.py | 17 ++++++++++++++--- ats/tests/test_evaluators.py | 37 +++++++++++++++++++++++------------- 2 files changed, 38 insertions(+), 16 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index c85c98a..65dd717 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -140,15 +140,26 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): raise ValueError('Variable granularity is not for this model') normalization_factor = variables_n * len(flagged_timeseries_df) + total_inserted_anomalies_n = 0 + total_detected_anomalies_n = 0 + detection_counts_by_anomaly_type = {} for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): + if anomaly is not None: + total_inserted_anomalies_n += frequency anomaly_count = 0 for timestamp in flagged_timeseries_df.index: if anomaly_labels_df[timestamp] == anomaly: for column in flagged_timeseries_df.filter(like='anomaly').columns: anomaly_count += flagged_timeseries_df.loc[timestamp,column] - one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor - - one_series_evaluation_result['false_positives'] = one_series_evaluation_result.pop(None) + if anomaly is not None: + total_detected_anomalies_n += anomaly_count + detection_counts_by_anomaly_type[anomaly] = anomaly_count + + total_inserted_anomalies_n *= variables_n + one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None) + one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor + one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n + one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n return one_series_evaluation_result def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 9c67be1..2109824 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -468,19 +468,30 @@ def test_copy_dataset(self): self.assertEqual(len(dataset_copies[1]),2) def test_variable_granularity_evaluation(self): - series_generator = HumiTempTimeseriesGenerator() - series = series_generator.generate(include_effect_label=True, anomalies=['step_uv']) - minmax = MinMaxAnomalyDetector() - formatted_series,anomaly_labels = _format_for_anomaly_detector(series,synthetic=True) - flagged_series = minmax.apply(formatted_series) - evaluation_result = _variable_granularity_evaluation(flagged_series,anomaly_labels) - # evaluation_result: - # { 'step_uv': 0.00034722222222222224 - # 'false_positives': 0.00034722222222222224 - # } - self.assertEqual(len(evaluation_result),2) - self.assertAlmostEqual(evaluation_result['step_uv'],1/len(series)) - self.assertAlmostEqual(evaluation_result['false_positives'],1/len(series)) + dataset = [self.series1] + evaluator = Evaluator(test_data=dataset) + minmax1 = MinMaxAnomalyDetector() + models={'detector_1': minmax1} + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='variable') + self.assertIn('detector_1',evaluation_results.keys()) + self.assertIn('anomalies_count',evaluation_results['detector_1'].keys()) + self.assertIn('anomalies_ratio',evaluation_results['detector_1'].keys()) + self.assertIn('false_positives_count',evaluation_results['detector_1'].keys()) + self.assertIn('false_positives_ratio',evaluation_results['detector_1'].keys()) + + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],4) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],4/6) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],0) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],0) + + dataset1 = [self.series2] + evaluator1 = Evaluator(test_data=dataset1) + evaluation_results = evaluator1.evaluate(models=models,granularity='variable') + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],3) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],3/8) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/(7*2)) def test_point_granularity_evaluation(self): series_generator = HumiTempTimeseriesGenerator() From e4bf4248f578ae2566ddb647121e428c89faafed Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 21:02:26 +0100 Subject: [PATCH 05/18] Fix the evaluation strategy in "_point_granularity_evaluation()" --- ats/evaluators.py | 13 ++++++++++++- ats/tests/test_evaluators.py | 37 +++++++++++++++++++++++------------- 2 files changed, 36 insertions(+), 14 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 65dd717..832f2ed 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -166,7 +166,12 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): one_series_evaluation_result = {} normalization_factor = len(flagged_timeseries_df) + total_inserted_anomalies_n = 0 + total_detected_anomalies_n = 0 + detection_counts_by_anomaly_type = {} for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): + if anomaly is not None: + total_inserted_anomalies_n += frequency anomaly_count = 0 for timestamp in flagged_timeseries_df.index: if anomaly_labels_df[timestamp] == anomaly: @@ -174,9 +179,15 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): if flagged_timeseries_df.loc[timestamp,column]: anomaly_count += 1 break + if anomaly is not None: + total_detected_anomalies_n += anomaly_count + detection_counts_by_anomaly_type[anomaly] = anomaly_count one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor - one_series_evaluation_result['false_positives'] = one_series_evaluation_result.pop(None) + one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None) + one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor + one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n + one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n return one_series_evaluation_result def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 2109824..bbc7903 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -494,19 +494,30 @@ def test_variable_granularity_evaluation(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/(7*2)) def test_point_granularity_evaluation(self): - series_generator = HumiTempTimeseriesGenerator() - series = series_generator.generate(include_effect_label=True, anomalies=['step_uv']) - minmax = MinMaxAnomalyDetector() - formatted_series,anomaly_labels = _format_for_anomaly_detector(series,synthetic=True) - flagged_series = minmax.apply(formatted_series) - evaluation_result = _point_granularity_evaluation(flagged_series,anomaly_labels) - # evaluation_result: - # { 'step_uv': 0.0006944444444444445 - # 'false_positives': 0.0006944444444444445 - # } - self.assertEqual(len(evaluation_result),2) - self.assertAlmostEqual(evaluation_result['step_uv'],2/len(series)) - self.assertAlmostEqual(evaluation_result['false_positives'],2/len(series)) + dataset = [self.series1] + evaluator = Evaluator(test_data=dataset) + minmax1 = MinMaxAnomalyDetector() + models={'detector_1': minmax1} + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='data_point') + self.assertIn('detector_1',evaluation_results.keys()) + self.assertIn('anomalies_count',evaluation_results['detector_1'].keys()) + self.assertIn('anomalies_ratio',evaluation_results['detector_1'].keys()) + self.assertIn('false_positives_count',evaluation_results['detector_1'].keys()) + self.assertIn('false_positives_ratio',evaluation_results['detector_1'].keys()) + + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],3) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],3/3) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],0) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],0) + + dataset1 = [self.series2] + evaluator1 = Evaluator(test_data=dataset1) + evaluation_results = evaluator1.evaluate(models=models,granularity='data_point') + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],3) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],3/4) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/7) def test_series_granularity_evaluation(self): series_generator = HumiTempTimeseriesGenerator() From 3b3f5ef39e060a15f371096d669a35c5a46c8573 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 21:06:08 +0100 Subject: [PATCH 06/18] Add series generation in "setUp()" method --- ats/tests/test_evaluators.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index bbc7903..afdf15b 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -46,6 +46,8 @@ def setUp(self): # 2025-06-10 18:00:00+00:00 -0.756802 -0.997336 anomaly_1 # 2025-06-10 19:00:00+00:00 -0.958924 -0.477482 None # 2025-06-10 20:00:00+00:00 -0.279415 0.481366 None + self.series3 = generate_timeseries_df(entries=3, variables=2) + self.series3['anomaly_label'] = [None, None, None] def test_evaluate_anomaly_detector(self): From b95d0c7e82d0af12ab58b6d84e5d5533c325cab3 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 22:03:27 +0100 Subject: [PATCH 07/18] Fix the evaluation strategy in "_series_granularity_evaluation()" and "_calculate_model_scores()" --- ats/evaluators.py | 45 ++++++------- ats/tests/test_evaluators.py | 126 ++++++++++++----------------------- 2 files changed, 62 insertions(+), 109 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 832f2ed..c6eaf07 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -46,29 +46,29 @@ def evaluate_anomaly_detector(evaluated_timeseries_df, anomaly_labels, details=F return evaluation_results -def _calculate_model_scores(single_model_evaluation={},granularity='data_point'): +def _calculate_model_scores(single_model_evaluation={}): dataset_anomalies = set() for sample in single_model_evaluation.keys(): sample_anomalies = set(single_model_evaluation[sample].keys()) dataset_anomalies.update(sample_anomalies) - anomaly_scores = {} - for anomaly in dataset_anomalies: - anomaly_scores[anomaly] = 0 - if 'false_positives' not in dataset_anomalies: - anomaly_scores['false_positives'] = 0.0 - - for anomaly in dataset_anomalies: - for sample in single_model_evaluation.keys(): - if anomaly in single_model_evaluation[sample].keys(): - anomaly_scores[anomaly] += single_model_evaluation[sample][anomaly] + model_scores = {} + anomalies_count = 0 + false_positives_count = 0 + anomalies_ratio = 0 + false_positives_ratio = 0 + for sample in single_model_evaluation.keys(): + anomalies_count += single_model_evaluation[sample]['anomalies_count'] + anomalies_ratio += single_model_evaluation[sample]['anomalies_ratio'] + false_positives_count += single_model_evaluation[sample]['false_positives_count'] + false_positives_ratio += single_model_evaluation[sample]['false_positives_ratio'] - if granularity == 'series': - samples_n = len(single_model_evaluation) - for key in anomaly_scores.keys(): - anomaly_scores[key] /= samples_n + model_scores['anomalies_count'] = anomalies_count + model_scores['anomalies_ratio'] = anomalies_ratio/len(single_model_evaluation) + model_scores['false_positives_count'] = false_positives_count + model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation) - return anomaly_scores + return model_scores class Evaluator(): @@ -195,9 +195,6 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): if anomaly is not None: anomalies.append(anomaly) - anomalies_n = len(anomalies) - if anomalies_n > 1: - raise ValueError('Evaluation with series granularity supports series with only one anomaly') one_series_evaluation_result = {} is_series_anomalous = 0 @@ -206,11 +203,9 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): if flagged_timeseries_df.loc[timestamp,column]: is_series_anomalous = 1 break - if is_series_anomalous and not anomalies: - one_series_evaluation_result['false_positives'] = 1 - elif is_series_anomalous and anomalies: - one_series_evaluation_result[anomalies[0]] = 1 - elif not is_series_anomalous and anomalies: - one_series_evaluation_result[anomalies[0]] = 0 + one_series_evaluation_result['false_positives_count'] = 1 if is_series_anomalous and not anomalies else 0 + one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count'] + one_series_evaluation_result['anomalies_count'] = 1 if is_series_anomalous and anomalies else 0 + one_series_evaluation_result['anomalies_ratio'] = one_series_evaluation_result['anomalies_count'] return one_series_evaluation_result \ No newline at end of file diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index afdf15b..49579fd 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -263,63 +263,27 @@ def test_get_model_output(self): self.assertIn('humidity_anomaly',list(flagged_dataset[1].columns)) def test_calculate_model_scores(self): - single_model_evaluation = { - 'sample_1': { - 'anomaly_1': 0.5, - 'anomaly_2': 0.2, - 'false_positives': 0.6 - }, - 'sample_2': { - 'anomaly_1': 0.3, - 'anomaly_2': 0.4, - 'anomaly_3': 0.1, - 'false_positives': 0.2 - }, - } - model_scores = _calculate_model_scores(single_model_evaluation,granularity='data_point') - # model_scores: - # { 'anomaly_3': 0.1, - # 'anomaly_1': 0.8, - # 'false_positives': 0.8, - # 'anomaly_2': 0.6 - # } - self.assertEqual(len(model_scores),4) - self.assertIsInstance(model_scores,dict) - self.assertIn('anomaly_1',model_scores.keys()) - self.assertIn('anomaly_2',model_scores.keys()) - self.assertIn('anomaly_3',model_scores.keys()) - self.assertIn('false_positives',model_scores.keys()) - self.assertAlmostEqual(model_scores['anomaly_1'],0.8) - self.assertAlmostEqual(model_scores['anomaly_2'],0.6) - self.assertAlmostEqual(model_scores['anomaly_3'],0.1) - self.assertAlmostEqual(model_scores['false_positives'],0.8) - - def test_calculate_model_score_series_granularity(self): - single_model_evaluation = { - 'sample_1': { - 'anomaly_1': 1, - }, - 'sample_2': { - 'false_positives': 1 - }, - 'sample_3': { - 'anomaly_2': 1 - } - } - model_scores = _calculate_model_scores(single_model_evaluation,granularity='series') - # model_scores: - # { 'anomaly_1': 0.3333333333333333, - # 'false_positives': 0.3333333333333333, - # 'anomaly_2': 0.3333333333333333 - # } - self.assertEqual(len(model_scores),3) + single_model_evaluation = { 'sample_1': {'anomalies_count': 3, 'anomalies_ratio': 1.5, + 'false_positives_count': 1, + 'false_positives_ratio': 0.14}, + 'sample_2': {'anomalies_count': 3, 'anomalies_ratio': 1.5, + 'false_positives_count': 1, + 'false_positives_ratio': 0.14}, + 'sample_3': {'anomalies_count': 3, 'anomalies_ratio': 1.5, + 'false_positives_count': 1, + 'false_positives_ratio': 0.14} + } + model_scores = _calculate_model_scores(single_model_evaluation) self.assertIsInstance(model_scores,dict) - self.assertIn('anomaly_1',model_scores.keys()) - self.assertIn('anomaly_2',model_scores.keys()) - self.assertIn('false_positives',model_scores.keys()) - self.assertAlmostEqual(model_scores['anomaly_1'],0.3333333333333333) - self.assertAlmostEqual(model_scores['anomaly_2'],0.3333333333333333) - self.assertAlmostEqual(model_scores['false_positives'],0.333333333333333) + self.assertIn('anomalies_count',model_scores.keys()) + self.assertIn('anomalies_ratio',model_scores.keys()) + self.assertIn('false_positives_count',model_scores.keys()) + self.assertIn('false_positives_ratio',model_scores.keys()) + + self.assertAlmostEqual(model_scores['anomalies_count'],9) + self.assertAlmostEqual(model_scores['anomalies_ratio'],1.5) + self.assertAlmostEqual(model_scores['false_positives_count'],3) + self.assertAlmostEqual(model_scores['false_positives_ratio'],0.14) def test_evaluate_point_granularity(self): anomalies = ['step_uv'] @@ -522,36 +486,30 @@ def test_point_granularity_evaluation(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/7) def test_series_granularity_evaluation(self): - series_generator = HumiTempTimeseriesGenerator() - series = series_generator.generate(include_effect_label=True, anomalies=['step_uv']) - minmax = MinMaxAnomalyDetector() - formatted_series,anomaly_labels = _format_for_anomaly_detector(series,synthetic=True) - flagged_series = minmax.apply(formatted_series) - evaluation_result = _series_granularity_evaluation(flagged_series,anomaly_labels) - # evaluation_result: - # { 'step_uv': 1 - # } - self.assertEqual(len(evaluation_result),1) - self.assertAlmostEqual(evaluation_result['step_uv'],1) - - series_1 = series_generator.generate(include_effect_label=True, anomalies=[]) + dataset = [self.series1] + evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() - formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(series_1,synthetic=True) - flagged_series1 = minmax.apply(formatted_series1) - evaluation_result1 = _series_granularity_evaluation(flagged_series1,anomaly_labels1) - self.assertEqual(len(evaluation_result1),1) - self.assertAlmostEqual(evaluation_result1['false_positives'],1) - # evaluation_result1: - # { 'false_positives': 1 - # } + models={'detector_1': minmax1} + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='series') + self.assertIn('detector_1',evaluation_results.keys()) + self.assertIn('anomalies_count',evaluation_results['detector_1'].keys()) + self.assertIn('anomalies_ratio',evaluation_results['detector_1'].keys()) + self.assertIn('false_positives_count',evaluation_results['detector_1'].keys()) + self.assertIn('false_positives_ratio',evaluation_results['detector_1'].keys()) - try: - series_2 = series_generator.generate(include_effect_label=True, anomalies=['spike_uv','step_uv']) - formatted_series2,anomaly_labels2 = _format_for_anomaly_detector(series_2,synthetic=True) - flagged_series2 = minmax.apply(formatted_series2) - evaluation_result2 = _series_granularity_evaluation(flagged_series2,anomaly_labels2) - except Exception as e: - self.assertIsInstance(e,ValueError) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],0) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],0) + + dataset1 = [self.series3] + evaluator1 = Evaluator(test_data=dataset1) + evaluation_results = evaluator1.evaluate(models=models,granularity='series') + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],0) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],0) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1) def test_double_evaluator(self): anomalies = ['step_uv'] From f74a36f53d02a0466f8844dd70f10f2ae3c28f74 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 23:27:50 +0100 Subject: [PATCH 08/18] Add test on evaluation with granularity = 'data_point' The new evaluation strategy has been used --- ats/tests/test_evaluators.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 49579fd..b8021fe 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -286,14 +286,7 @@ def test_calculate_model_scores(self): self.assertAlmostEqual(model_scores['false_positives_ratio'],0.14) def test_evaluate_point_granularity(self): - anomalies = ['step_uv'] - effects = [] - # series with 2880 data points - series_generator = HumiTempTimeseriesGenerator() - series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - series_2 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - dataset = [series_1,series_2] - evaluator = Evaluator(test_data=dataset) + dataset = [self.series1, self.series2, self.series3] minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() minmax3 = MinMaxAnomalyDetector() @@ -301,18 +294,12 @@ def test_evaluate_point_granularity(self): 'detector_2': minmax2, 'detector_3': minmax3 } + evaluator = Evaluator(test_data=dataset) evaluation_results = evaluator.evaluate(models=models,granularity='data_point') - # Evaluation_results: - # detector_1: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} - # detector_2: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} - # detector_3: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} - self.assertIsInstance(evaluation_results,dict) - self.assertEqual(len(evaluation_results),3) - self.assertEqual(len(evaluation_results['detector_1']),2) - self.assertEqual(len(evaluation_results['detector_2']),2) - self.assertEqual(len(evaluation_results['detector_3']),2) - self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.000694444444444444) - self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.000694444444444444) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/12) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21) def test_evaluate_variable_granularity(self): anomalies = ['step_uv'] From a83b7f78cefed0961e85f2982887a0b658d6458d Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 23:29:56 +0100 Subject: [PATCH 09/18] Delete the argument "granularity" from "_calculate_model_scores()" The new version of the function have no more this argument --- ats/evaluators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index c6eaf07..3640d03 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -110,7 +110,7 @@ def evaluate(self,models={},granularity='data_point'): if granularity == 'series': single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i]) - models_scores[model_name] = _calculate_model_scores(single_model_evaluation,granularity=granularity) + models_scores[model_name] = _calculate_model_scores(single_model_evaluation) j+=1 return models_scores From 0f6bf6b0a84d574a026411c071a965aaf5c7b2df Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Mon, 1 Dec 2025 23:44:01 +0100 Subject: [PATCH 10/18] Manage zero divisions About the evaluation on a single series: Before, the calculation of the 'anomalies_ratio' value was a zero division for not anomalous series. Now, 'anomalies_ratio' value is "None" for not anomalous series. About the evaluation on a dataset: Now, the value 'anomalies_ratio' is calculated averaging only on anomalous series --- ats/evaluators.py | 18 ++++++++++++++---- ats/tests/test_evaluators.py | 2 +- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 3640d03..614b8ba 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -57,14 +57,18 @@ def _calculate_model_scores(single_model_evaluation={}): false_positives_count = 0 anomalies_ratio = 0 false_positives_ratio = 0 + anomalous_series_n = 0 for sample in single_model_evaluation.keys(): anomalies_count += single_model_evaluation[sample]['anomalies_count'] - anomalies_ratio += single_model_evaluation[sample]['anomalies_ratio'] + if single_model_evaluation[sample]['anomalies_ratio'] is not None: + anomalies_ratio += single_model_evaluation[sample]['anomalies_ratio'] + else: + anomalous_series_n += 1 false_positives_count += single_model_evaluation[sample]['false_positives_count'] false_positives_ratio += single_model_evaluation[sample]['false_positives_ratio'] model_scores['anomalies_count'] = anomalies_count - model_scores['anomalies_ratio'] = anomalies_ratio/len(single_model_evaluation) + model_scores['anomalies_ratio'] = anomalies_ratio/(len(single_model_evaluation) - anomalous_series_n) model_scores['false_positives_count'] = false_positives_count model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation) @@ -159,7 +163,10 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None) one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n - one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n + if total_inserted_anomalies_n: + one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n + else: + one_series_evaluation_result['anomalies_ratio'] = None return one_series_evaluation_result def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): @@ -187,7 +194,10 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None) one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n - one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n + if total_inserted_anomalies_n: + one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n + else: + one_series_evaluation_result['anomalies_ratio'] = None return one_series_evaluation_result def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index b8021fe..b2fbe84 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -297,7 +297,7 @@ def test_evaluate_point_granularity(self): evaluator = Evaluator(test_data=dataset) evaluation_results = evaluator.evaluate(models=models,granularity='data_point') self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6) - self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/12) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/8) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21) From 0758d98675d2f6369994f167d7769968d20d740f Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Tue, 2 Dec 2025 00:12:19 +0100 Subject: [PATCH 11/18] Add test on evaluation with granularity='variable' --- ats/tests/test_evaluators.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index b2fbe84..184db71 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -302,14 +302,7 @@ def test_evaluate_point_granularity(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21) def test_evaluate_variable_granularity(self): - anomalies = ['step_uv'] - effects = [] - # series with 2880 data points - series_generator = HumiTempTimeseriesGenerator() - series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - series_2 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - dataset = [series_1,series_2] - evaluator = Evaluator(test_data=dataset) + dataset = [self.series1, self.series2, self.series3] minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() minmax3 = MinMaxAnomalyDetector() @@ -317,19 +310,12 @@ def test_evaluate_variable_granularity(self): 'detector_2': minmax2, 'detector_3': minmax3 } + evaluator = Evaluator(test_data=dataset) evaluation_results = evaluator.evaluate(models=models,granularity='variable') - # Evaluation_results: - # detector_1: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} - # detector_2: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} - # detector_3: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} - - self.assertIsInstance(evaluation_results,dict) - self.assertEqual(len(evaluation_results),3) - self.assertEqual(len(evaluation_results['detector_1']),2) - self.assertEqual(len(evaluation_results['detector_2']),2) - self.assertEqual(len(evaluation_results['detector_3']),2) - self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.000694444444444444) - self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.000694444444444444) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],7) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],25/48) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],5) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],31/126) def test_evaluate_series_granularity(self): anomalies = ['step_uv'] From 669c17665cf89768da62711b7992c27ea8c86682 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Tue, 2 Dec 2025 00:15:43 +0100 Subject: [PATCH 12/18] Add test on evaluation with granularity='series' --- ats/tests/test_evaluators.py | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 184db71..82e0fa6 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -318,15 +318,7 @@ def test_evaluate_variable_granularity(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],31/126) def test_evaluate_series_granularity(self): - anomalies = ['step_uv'] - effects = [] - series_generator = HumiTempTimeseriesGenerator() - # series_1 will be a true anomaly for the minmax - series_1 = series_generator.generate(include_effect_label=True, anomalies=anomalies,effects=effects) - # series_2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) - series_2 = series_generator.generate(include_effect_label=True, anomalies=[],effects=effects) - dataset = [series_1,series_2] - evaluator = Evaluator(test_data=dataset) + dataset = [self.series1, self.series2, self.series3] minmax1 = MinMaxAnomalyDetector() minmax2 = MinMaxAnomalyDetector() minmax3 = MinMaxAnomalyDetector() @@ -334,19 +326,12 @@ def test_evaluate_series_granularity(self): 'detector_2': minmax2, 'detector_3': minmax3 } + evaluator = Evaluator(test_data=dataset) evaluation_results = evaluator.evaluate(models=models,granularity='series') - # Evaluation_results: - # detector_1: {'step_uv': 0.5, 'false_positives': 0.5} - # detector_2: {'step_uv': 0.5, 'false_positives': 0.5} - # detector_3: {'step_uv': 0.5, 'false_positives': 0.5} - - self.assertIsInstance(evaluation_results,dict) - self.assertEqual(len(evaluation_results),3) - self.assertEqual(len(evaluation_results['detector_1']),2) - self.assertEqual(len(evaluation_results['detector_2']),2) - self.assertEqual(len(evaluation_results['detector_3']),2) - self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.5) - self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.5) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],2) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],2/3) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/3) def test_series_granularity_eval_with_non_detected_anomalies(self): effects = [] From 6990662f6a7060d038e85b5813b7a032830e5b2a Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Tue, 2 Dec 2025 00:17:40 +0100 Subject: [PATCH 13/18] Delete tests on old evaluation strategy --- ats/tests/test_evaluators.py | 42 ------------------------------------ 1 file changed, 42 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 82e0fa6..7e085b5 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -333,48 +333,6 @@ def test_evaluate_series_granularity(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/3) - def test_series_granularity_eval_with_non_detected_anomalies(self): - effects = [] - series_generator = HumiTempTimeseriesGenerator() - # series_1 will be a true anomaly for the minmax - series_1 = series_generator.generate(include_effect_label=True, anomalies=['step_uv'],effects=effects) - # series_2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) - series_2 = series_generator.generate(include_effect_label=True, anomalies=['pattern_uv'],effects=effects) - dataset = [series_1,series_2] - evaluator = Evaluator(test_data=dataset) - minmax1 = MinMaxAnomalyDetector() - minmax2 = MinMaxAnomalyDetector() - minmax3 = MinMaxAnomalyDetector() - models={'detector_1': minmax1, - 'detector_2': minmax2, - 'detector_3': minmax3 - } - evaluation_results = evaluator.evaluate(models=models,granularity='series') - # Evaluation_results: - # detector_1: {'step_uv': 0.5, 'pattern_uv': 0.5, 'false_positives': 0.0} - # detector_2: {'step_uv': 0.5, 'pattern_uv': 0.5, 'false_positives': 0.0} - # detector_3: {'step_uv': 0.5, 'pattern_uv': 0.5, 'false_positives': 0.0} - self.assertIsInstance(evaluation_results,dict) - self.assertEqual(len(evaluation_results),3) - self.assertEqual(len(evaluation_results['detector_1']),3) - self.assertEqual(len(evaluation_results['detector_2']),3) - self.assertEqual(len(evaluation_results['detector_3']),3) - self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.5) - self.assertAlmostEqual(evaluation_results['detector_1']['pattern_uv'],0.5) - self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.0) - - def test_raised_error_evaluation_series_granularity(self): - anomalies = ['step_uv','spike_uv'] - series_generator = HumiTempTimeseriesGenerator() - series = series_generator.generate(include_effect_label=True, anomalies=anomalies) - dataset = [series] - minmax = MinMaxAnomalyDetector() - evaluator = Evaluator(test_data=dataset) - try: - evaluation_result = evaluator.evaluate(models={'detector':minmax},granularity='series') - except Exception as e: - self.assertIsInstance(e,ValueError) - def test_copy_dataset(self): series_generator = HumiTempTimeseriesGenerator() series_1 = series_generator.generate(include_effect_label=True, effects=['noise']) From bddce755c411bb6dc3e3595f10849c39fb85b99d Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Tue, 2 Dec 2025 21:30:39 +0100 Subject: [PATCH 14/18] Change the way anomalous series in the dataset are counted --- ats/evaluators.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 614b8ba..edcc2cf 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -62,13 +62,12 @@ def _calculate_model_scores(single_model_evaluation={}): anomalies_count += single_model_evaluation[sample]['anomalies_count'] if single_model_evaluation[sample]['anomalies_ratio'] is not None: anomalies_ratio += single_model_evaluation[sample]['anomalies_ratio'] - else: anomalous_series_n += 1 false_positives_count += single_model_evaluation[sample]['false_positives_count'] false_positives_ratio += single_model_evaluation[sample]['false_positives_ratio'] model_scores['anomalies_count'] = anomalies_count - model_scores['anomalies_ratio'] = anomalies_ratio/(len(single_model_evaluation) - anomalous_series_n) + model_scores['anomalies_ratio'] = anomalies_ratio/anomalous_series_n model_scores['false_positives_count'] = false_positives_count model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation) From 328f185a59b8258319e0107742ebd910f8e2b2a9 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Tue, 2 Dec 2025 21:50:58 +0100 Subject: [PATCH 15/18] Set to "None" the anomalies_ratio in 2 particular cases When the series is not anomalous and when the dataset does not contain anomalous series --- ats/evaluators.py | 7 +++++-- ats/tests/test_evaluators.py | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index edcc2cf..56d760d 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -67,7 +67,10 @@ def _calculate_model_scores(single_model_evaluation={}): false_positives_ratio += single_model_evaluation[sample]['false_positives_ratio'] model_scores['anomalies_count'] = anomalies_count - model_scores['anomalies_ratio'] = anomalies_ratio/anomalous_series_n + if anomalous_series_n: + model_scores['anomalies_ratio'] = anomalies_ratio/anomalous_series_n + else: + model_scores['anomalies_ratio'] = None model_scores['false_positives_count'] = false_positives_count model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation) @@ -215,6 +218,6 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): one_series_evaluation_result['false_positives_count'] = 1 if is_series_anomalous and not anomalies else 0 one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count'] one_series_evaluation_result['anomalies_count'] = 1 if is_series_anomalous and anomalies else 0 - one_series_evaluation_result['anomalies_ratio'] = one_series_evaluation_result['anomalies_count'] + one_series_evaluation_result['anomalies_ratio'] = one_series_evaluation_result['anomalies_count'] if anomalies else None return one_series_evaluation_result \ No newline at end of file diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 7e085b5..300c7c7 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -329,7 +329,7 @@ def test_evaluate_series_granularity(self): evaluator = Evaluator(test_data=dataset) evaluation_results = evaluator.evaluate(models=models,granularity='series') self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],2) - self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],2/3) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],2/2) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/3) @@ -423,7 +423,7 @@ def test_series_granularity_evaluation(self): evaluator1 = Evaluator(test_data=dataset1) evaluation_results = evaluator1.evaluate(models=models,granularity='series') self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],0) - self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],0) + self.assertIsNone(evaluation_results['detector_1']['anomalies_ratio']) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1) From ab5a1422a2ffb5287ac782ea3183882a841b2ecb Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Tue, 2 Dec 2025 22:54:36 +0100 Subject: [PATCH 16/18] Add check on available granularity --- ats/evaluators.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 56d760d..b1b61eb 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -111,10 +111,12 @@ def evaluate(self,models={},granularity='data_point'): for i,sample_df in enumerate(flagged_dataset): if granularity == 'data_point': single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i]) - if granularity == 'variable': + elif granularity == 'variable': single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i]) - if granularity == 'series': + elif granularity == 'series': single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i]) + else: + raise ValueError(f'Unknown granularity {granularity}') models_scores[model_name] = _calculate_model_scores(single_model_evaluation) j+=1 From 05af0f1e14b77d67e80faa9f93f9dbcc60330351 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 18:50:58 +0100 Subject: [PATCH 17/18] Change 'data_point' to 'point' --- ats/evaluators.py | 4 ++-- ats/tests/test_evaluators.py | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index b1b61eb..85981f6 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -88,7 +88,7 @@ def _copy_dataset(self,dataset,models): dataset_copies.append(dataset_copy) return dataset_copies - def evaluate(self,models={},granularity='data_point'): + def evaluate(self,models={},granularity='point'): if not models: raise ValueError('There are no models to evaluate') if not self.test_data: @@ -109,7 +109,7 @@ def evaluate(self,models={},granularity='data_point'): single_model_evaluation = {} flagged_dataset = _get_model_output(dataset_copies[j],model) for i,sample_df in enumerate(flagged_dataset): - if granularity == 'data_point': + if granularity == 'point': single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i]) elif granularity == 'variable': single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i]) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 300c7c7..e6a9de5 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -295,7 +295,7 @@ def test_evaluate_point_granularity(self): 'detector_3': minmax3 } evaluator = Evaluator(test_data=dataset) - evaluation_results = evaluator.evaluate(models=models,granularity='data_point') + evaluation_results = evaluator.evaluate(models=models,granularity='point') self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6) self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/8) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4) @@ -381,7 +381,7 @@ def test_point_granularity_evaluation(self): minmax1 = MinMaxAnomalyDetector() models={'detector_1': minmax1} evaluator = Evaluator(test_data=dataset) - evaluation_results = evaluator.evaluate(models=models,granularity='data_point') + evaluation_results = evaluator.evaluate(models=models,granularity='point') self.assertIn('detector_1',evaluation_results.keys()) self.assertIn('anomalies_count',evaluation_results['detector_1'].keys()) self.assertIn('anomalies_ratio',evaluation_results['detector_1'].keys()) @@ -395,7 +395,7 @@ def test_point_granularity_evaluation(self): dataset1 = [self.series2] evaluator1 = Evaluator(test_data=dataset1) - evaluation_results = evaluator1.evaluate(models=models,granularity='data_point') + evaluation_results = evaluator1.evaluate(models=models,granularity='point') self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],3) self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],3/4) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) From c15939aefc0fdc4b238c26e4242406b6bb9eead2 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 18:58:56 +0100 Subject: [PATCH 18/18] Add "strategy" argument to function "evaluate()" This argument set the evaluation strategy. Suported values are "flags" (default) and "events" (Not implemented) --- ats/evaluators.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 85981f6..6a354e0 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -88,7 +88,9 @@ def _copy_dataset(self,dataset,models): dataset_copies.append(dataset_copy) return dataset_copies - def evaluate(self,models={},granularity='point'): + def evaluate(self,models={},granularity='point',strategy='flags'): + if strategy != 'flags': + raise NotImplementedError(f'Evaluation strategy {strategy} is not implemented') if not models: raise ValueError('There are no models to evaluate') if not self.test_data: