From 5656759425da1af19015ed6b0406d4e41085837f Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Thu, 4 Dec 2025 21:24:08 +0100 Subject: [PATCH 01/15] Add the "breakdown" argument in "evaluate()" --- ats/evaluators.py | 1 + 1 file changed, 1 insertion(+) diff --git a/ats/evaluators.py b/ats/evaluators.py index 6a354e0..a7b5701 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -91,6 +91,7 @@ def _copy_dataset(self,dataset,models): def evaluate(self,models={},granularity='point',strategy='flags'): if strategy != 'flags': raise NotImplementedError(f'Evaluation strategy {strategy} is not implemented') + if not models: raise ValueError('There are no models to evaluate') if not self.test_data: From cd3216320837896bfbdc30725333270dce089b00 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Thu, 4 Dec 2025 22:25:28 +0100 Subject: [PATCH 02/15] Add "breakdown" argument in "_variable_granularity_evaluation()" --- ats/evaluators.py | 21 ++++++++++++++------- ats/tests/test_evaluators.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 7 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index a7b5701..00b19f4 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -115,7 +115,7 @@ def evaluate(self,models={},granularity='point',strategy='flags'): if granularity == 'point': single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i]) elif granularity == 'variable': - single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i]) + single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown) elif granularity == 'series': single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i]) else: @@ -143,7 +143,7 @@ def _get_model_output(dataset,model): return flagged_dataset -def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): +def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakdown=False): one_series_evaluation_result = {} flag_columns_n = len(flagged_timeseries_df.filter(like='anomaly').columns) variables_n = len(flagged_timeseries_df.columns) - flag_columns_n @@ -153,7 +153,8 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): total_inserted_anomalies_n = 0 total_detected_anomalies_n = 0 - detection_counts_by_anomaly_type = {} + breakdown_info = {} + false_positives_count = 0 for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): if anomaly is not None: total_inserted_anomalies_n += frequency @@ -164,17 +165,23 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): anomaly_count += flagged_timeseries_df.loc[timestamp,column] if anomaly is not None: total_detected_anomalies_n += anomaly_count - detection_counts_by_anomaly_type[anomaly] = anomaly_count + breakdown_info[anomaly + '_anomaly' + '_count'] = anomaly_count + breakdown_info[anomaly + '_anomaly' + '_ratio'] = anomaly_count/(frequency * variables_n) + else: + false_positives_count +=1 total_inserted_anomalies_n *= variables_n - one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None) - one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor + one_series_evaluation_result['false_positives_count'] = false_positives_count + one_series_evaluation_result['false_positives_ratio'] = false_positives_count/normalization_factor one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n if total_inserted_anomalies_n: one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n else: one_series_evaluation_result['anomalies_ratio'] = None - return one_series_evaluation_result + if breakdown: + return one_series_evaluation_result | breakdown_info + else: + return one_series_evaluation_result def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): one_series_evaluation_result = {} diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index e6a9de5..2d01729 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -375,6 +375,36 @@ def test_variable_granularity_evaluation(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/(7*2)) + def test_variable_granularity_evaluation_with_breakdown(self): + formatted_series,anomaly_labels = _format_for_anomaly_detector(self.series1) + minmax1 = MinMaxAnomalyDetector() + flagged_series = _get_model_output([formatted_series],minmax1) + evaluation_results = _variable_granularity_evaluation(flagged_series[0],anomaly_labels,breakdown=True) + + self.assertIn('anomalies_count',evaluation_results.keys()) + self.assertIn('anomalies_ratio',evaluation_results.keys()) + self.assertIn('false_positives_count',evaluation_results.keys()) + self.assertIn('false_positives_ratio',evaluation_results.keys()) + + self.assertIn('anomaly_1_anomaly_count',evaluation_results.keys()) + self.assertIn('anomaly_1_anomaly_ratio',evaluation_results.keys()) + self.assertIn('anomaly_2_anomaly_count',evaluation_results.keys()) + self.assertIn('anomaly_2_anomaly_ratio',evaluation_results.keys()) + + self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_count'],3) + self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_ratio'],3/4) + self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_count'],1) + self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_ratio'],1/2) + + formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(self.series3) + flagged_series1 = _get_model_output([formatted_series1],minmax1) + evaluation_results1 = _variable_granularity_evaluation(flagged_series1[0],anomaly_labels1,breakdown=True) + + self.assertNotIn('anomaly_1_anomaly_count',evaluation_results1.keys()) + self.assertNotIn('anomaly_1_anomaly_ratio',evaluation_results1.keys()) + self.assertNotIn('anomaly_2_anomaly_count',evaluation_results1.keys()) + self.assertNotIn('anomaly_2_anomaly_ratio',evaluation_results1.keys()) + def test_point_granularity_evaluation(self): dataset = [self.series1] evaluator = Evaluator(test_data=dataset) From b106258215123ff29aa4e537a063af1e3ecd37cc Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Thu, 4 Dec 2025 22:40:16 +0100 Subject: [PATCH 03/15] Add "breakdown" argument in "_point_granularity_evaluation()" --- ats/evaluators.py | 19 +++++++++++++------ ats/tests/test_evaluators.py | 21 +++++++++++++++++++++ 2 files changed, 34 insertions(+), 6 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 00b19f4..9974390 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -183,13 +183,14 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,bre else: return one_series_evaluation_result -def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): +def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakdown=False): one_series_evaluation_result = {} normalization_factor = len(flagged_timeseries_df) total_inserted_anomalies_n = 0 total_detected_anomalies_n = 0 - detection_counts_by_anomaly_type = {} + breakdown_info = {} + false_positives_count = 0 for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): if anomaly is not None: total_inserted_anomalies_n += frequency @@ -202,17 +203,23 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): break if anomaly is not None: total_detected_anomalies_n += anomaly_count - detection_counts_by_anomaly_type[anomaly] = anomaly_count + breakdown_info[anomaly + '_anomaly_count'] = anomaly_count + breakdown_info[anomaly + '_anomaly_ratio'] = anomaly_count/frequency + else: + false_positives_count += 1 one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor - one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None) - one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor + one_series_evaluation_result['false_positives_count'] = false_positives_count + one_series_evaluation_result['false_positives_ratio'] = false_positives_count/normalization_factor one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n if total_inserted_anomalies_n: one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n else: one_series_evaluation_result['anomalies_ratio'] = None - return one_series_evaluation_result + if breakdown: + return one_series_evaluation_result | breakdown_info + else: + return one_series_evaluation_result def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): anomalies = [] diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 2d01729..93555fb 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -431,6 +431,27 @@ def test_point_granularity_evaluation(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/7) + def test_point_granularity_evaluation_with_breakdown(self): + formatted_series,anomaly_labels = _format_for_anomaly_detector(self.series1) + minmax1 = MinMaxAnomalyDetector() + flagged_series = _get_model_output([formatted_series],minmax1) + evaluation_results = _point_granularity_evaluation(flagged_series[0],anomaly_labels,breakdown=True) + + self.assertIn('anomalies_count',evaluation_results.keys()) + self.assertIn('anomalies_ratio',evaluation_results.keys()) + self.assertIn('false_positives_count',evaluation_results.keys()) + self.assertIn('false_positives_ratio',evaluation_results.keys()) + + self.assertIn('anomaly_1_anomaly_count',evaluation_results.keys()) + self.assertIn('anomaly_1_anomaly_ratio',evaluation_results.keys()) + self.assertIn('anomaly_2_anomaly_count',evaluation_results.keys()) + self.assertIn('anomaly_2_anomaly_ratio',evaluation_results.keys()) + + self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_count'],2) + self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_ratio'],2/2) + self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_count'],1) + self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_ratio'],1/1) + def test_series_granularity_evaluation(self): dataset = [self.series1] evaluator = Evaluator(test_data=dataset) From 36da9c115b4b5e658396b08f378546610fea3877 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Thu, 4 Dec 2025 23:19:27 +0100 Subject: [PATCH 04/15] Add "breakdown" argument in "_series_granularity_evaluation()" --- ats/evaluators.py | 14 ++++++++++++-- ats/tests/test_evaluators.py | 24 ++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 2 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 9974390..d76b8ef 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -221,22 +221,32 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakd else: return one_series_evaluation_result -def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): +def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakdown=False): anomalies = [] for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): if anomaly is not None: anomalies.append(anomaly) + if len(anomalies) != 1 and breakdown: + raise ValueError('Series must have only 1 anomaly type for breakdown in mode granularity = "series"') + else: + inserted_anomaly = anomalies[0] one_series_evaluation_result = {} + breakdown_info = {} is_series_anomalous = 0 for timestamp in flagged_timeseries_df.index: for column in flagged_timeseries_df.filter(like='anomaly').columns: if flagged_timeseries_df.loc[timestamp,column]: is_series_anomalous = 1 + breakdown_info[inserted_anomaly + '_anomaly_count'] = 1 + breakdown_info[inserted_anomaly + '_anomaly_ratio'] = 1 break one_series_evaluation_result['false_positives_count'] = 1 if is_series_anomalous and not anomalies else 0 one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count'] one_series_evaluation_result['anomalies_count'] = 1 if is_series_anomalous and anomalies else 0 one_series_evaluation_result['anomalies_ratio'] = one_series_evaluation_result['anomalies_count'] if anomalies else None - return one_series_evaluation_result \ No newline at end of file + if breakdown: + return one_series_evaluation_result | breakdown_info + else: + return one_series_evaluation_result diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 93555fb..5facf92 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -478,6 +478,30 @@ def test_series_granularity_evaluation(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1) + def test_series_granularity_evaluation_with_breakdown(self): + series = generate_timeseries_df(entries=3, variables=2) + series['anomaly_label'] = [None,None,'anomaly_1'] + formatted_series,anomaly_labels = _format_for_anomaly_detector(series) + minmax1 = MinMaxAnomalyDetector() + flagged_series = _get_model_output([formatted_series],minmax1) + evaluation_results = _series_granularity_evaluation(flagged_series[0],anomaly_labels,breakdown=True) + + self.assertIn('anomalies_count',evaluation_results.keys()) + self.assertIn('anomalies_ratio',evaluation_results.keys()) + self.assertIn('false_positives_count',evaluation_results.keys()) + self.assertIn('false_positives_ratio',evaluation_results.keys()) + self.assertIn('anomaly_1_anomaly_count',evaluation_results.keys()) + self.assertIn('anomaly_1_anomaly_ratio',evaluation_results.keys()) + self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_count'],1) + self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_ratio'],1) + + formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(self.series1) + flagged_series1 = _get_model_output([formatted_series1],minmax1) + try: + evaluation_results = _point_granularity_evaluation(flagged_series1[0],anomaly_labels1,breakdown=True) + except Exception as e: + self.assertIsInstance(e,ValueError) + def test_double_evaluator(self): anomalies = ['step_uv'] effects = [] From e3c27bc84b895c67059bccf2d350285000ff70f1 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 00:25:46 +0100 Subject: [PATCH 05/15] Add "_get_breakdown_info()" function --- ats/evaluators.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index d76b8ef..0060603 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -47,11 +47,6 @@ def evaluate_anomaly_detector(evaluated_timeseries_df, anomaly_labels, details=F def _calculate_model_scores(single_model_evaluation={}): - dataset_anomalies = set() - for sample in single_model_evaluation.keys(): - sample_anomalies = set(single_model_evaluation[sample].keys()) - dataset_anomalies.update(sample_anomalies) - model_scores = {} anomalies_count = 0 false_positives_count = 0 @@ -73,9 +68,11 @@ def _calculate_model_scores(single_model_evaluation={}): model_scores['anomalies_ratio'] = None model_scores['false_positives_count'] = false_positives_count model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation) - return model_scores + def _get_breakdown_info(single_model_evaluation={}): + pass + class Evaluator(): def __init__(self,test_data): @@ -120,8 +117,10 @@ def evaluate(self,models={},granularity='point',strategy='flags'): single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i]) else: raise ValueError(f'Unknown granularity {granularity}') - - models_scores[model_name] = _calculate_model_scores(single_model_evaluation) + if breakdown: + models_scores[model_name] = _calculate_model_scores(single_model_evaluation) | _get_breakdown_info(single_model_evaluation) + else: + models_scores[model_name] = _calculate_model_scores(single_model_evaluation) j+=1 return models_scores From c87d9c8bf93314bf25b1834ec0469d66b6b8b1f8 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 00:26:52 +0100 Subject: [PATCH 06/15] Add "breakdown" argument in using internal functions inside "evaluate()" --- ats/evaluators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 0060603..5a30d6c 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -114,7 +114,7 @@ def evaluate(self,models={},granularity='point',strategy='flags'): elif granularity == 'variable': single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown) elif granularity == 'series': - single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i]) + single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown) else: raise ValueError(f'Unknown granularity {granularity}') if breakdown: From d44f78b2b3385c871a55020efb22c2f94569defb Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 10:45:08 +0100 Subject: [PATCH 07/15] Add "_get_breakdown_info()" function --- ats/evaluators.py | 29 +++++++++++++++++++++++++-- ats/tests/test_evaluators.py | 39 ++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+), 2 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 5a30d6c..1f93f59 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -70,8 +70,33 @@ def _calculate_model_scores(single_model_evaluation={}): model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation) return model_scores - def _get_breakdown_info(single_model_evaluation={}): - pass +def _get_breakdown_info(single_model_evaluation={}): + if 'anomalies_count' in single_model_evaluation.keys(): + del single_model_evaluation['anomalies_count'] + if 'anomalies_ratio' in single_model_evaluation.keys(): + del single_model_evaluation['anomalies_ratio'] + if 'false_positives_count' in single_model_evaluation.keys(): + del single_model_evaluation['false_positives_count'] + if 'false_positives_ratio' in single_model_evaluation.keys(): + del single_model_evaluation['false_positives_ratio'] + + breakdown_info = {} + # how many series in the dataset have that anomaly type + anomaly_series_count_by_type = {} + for sample, sample_evaluation in single_model_evaluation.items(): + for key in sample_evaluation.keys(): + if key in breakdown_info.keys(): + anomaly_series_count_by_type[key] +=1 + breakdown_info[key] += sample_evaluation[key] + else: + anomaly_series_count_by_type[key] =1 + breakdown_info[key] = sample_evaluation[key] + + for key in breakdown_info.keys(): + if '_ratio' in key: + breakdown_info[key] /= anomaly_series_count_by_type[key] + + return breakdown_info class Evaluator(): diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 5facf92..a9dee32 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -9,6 +9,7 @@ from ..evaluators import _variable_granularity_evaluation from ..evaluators import _point_granularity_evaluation from ..evaluators import _series_granularity_evaluation +from ..evaluators import _get_breakdown_info import unittest import pandas as pd import random as rnd @@ -502,6 +503,44 @@ def test_series_granularity_evaluation_with_breakdown(self): except Exception as e: self.assertIsInstance(e,ValueError) + def test_get_breakdown_info(self): + single_model_evaluation = { 'sample_1': {'anomalies_count': 3, 'anomalies_ratio': 1.5, + 'false_positives_count': 1, + 'false_positives_ratio': 0.14, + 'spike_anomaly_count': 1, + 'spike_anomaly_ratio': 0.5}, + 'sample_2': {'anomalies_count': 3, 'anomalies_ratio': 1.5, + 'false_positives_count': 1, + 'false_positives_ratio': 0.14, + 'spike_anomaly_count': 1, + 'spike_anomaly_ratio': 0.5, + 'step_anomaly_count': 2, + 'step_anomaly_ratio': 2/3 + }, + 'sample_3': {'anomalies_count': 3, 'anomalies_ratio': 1.5, + 'false_positives_count': 1, + 'false_positives_ratio': 0.14, + 'step_anomaly_count': 3, + 'step_anomaly_ratio': 1, + 'pattern_anomaly_count': 2, + 'pattern_anomaly_ratio': 0.5 + } + } + breakdown = _get_breakdown_info(single_model_evaluation) + self.assertIn('spike_anomaly_count',breakdown.keys()) + self.assertIn('spike_anomaly_ratio',breakdown.keys()) + self.assertIn('step_anomaly_count',breakdown.keys()) + self.assertIn('step_anomaly_ratio',breakdown.keys()) + self.assertIn('pattern_anomaly_count',breakdown.keys()) + self.assertIn('pattern_anomaly_ratio',breakdown.keys()) + + self.assertAlmostEqual(breakdown['spike_anomaly_count'],2) + self.assertAlmostEqual(breakdown['spike_anomaly_ratio'],1/2) + self.assertAlmostEqual(breakdown['step_anomaly_count'],5) + self.assertAlmostEqual(breakdown['step_anomaly_ratio'],5/6) + self.assertAlmostEqual(breakdown['pattern_anomaly_count'],2) + self.assertAlmostEqual(breakdown['pattern_anomaly_ratio'],0.5) + def test_double_evaluator(self): anomalies = ['step_uv'] effects = [] From 987cde484cfaa33fddab5c0dfad7105974199860 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 10:57:15 +0100 Subject: [PATCH 08/15] Fix an error --- ats/evaluators.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 1f93f59..0beeb29 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -142,10 +142,8 @@ def evaluate(self,models={},granularity='point',strategy='flags'): single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown) else: raise ValueError(f'Unknown granularity {granularity}') - if breakdown: - models_scores[model_name] = _calculate_model_scores(single_model_evaluation) | _get_breakdown_info(single_model_evaluation) - else: - models_scores[model_name] = _calculate_model_scores(single_model_evaluation) + + models_scores[model_name] = _calculate_model_scores(single_model_evaluation) j+=1 return models_scores @@ -252,8 +250,6 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,break anomalies.append(anomaly) if len(anomalies) != 1 and breakdown: raise ValueError('Series must have only 1 anomaly type for breakdown in mode granularity = "series"') - else: - inserted_anomaly = anomalies[0] one_series_evaluation_result = {} breakdown_info = {} @@ -262,8 +258,10 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,break for column in flagged_timeseries_df.filter(like='anomaly').columns: if flagged_timeseries_df.loc[timestamp,column]: is_series_anomalous = 1 - breakdown_info[inserted_anomaly + '_anomaly_count'] = 1 - breakdown_info[inserted_anomaly + '_anomaly_ratio'] = 1 + if anomalies: + inserted_anomaly = anomalies[0] + breakdown_info[inserted_anomaly + '_anomaly_count'] = 1 + breakdown_info[inserted_anomaly + '_anomaly_ratio'] = 1 break one_series_evaluation_result['false_positives_count'] = 1 if is_series_anomalous and not anomalies else 0 one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count'] From 802bfe42bbf9b3316aee0870929305d65b72f398 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 11:03:33 +0100 Subject: [PATCH 09/15] Fix errors in evaluation with variable granularity Now false positives and anomalies are counted correctly --- ats/evaluators.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index 0beeb29..fef71fe 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -184,13 +184,14 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,bre for timestamp in flagged_timeseries_df.index: if anomaly_labels_df[timestamp] == anomaly: for column in flagged_timeseries_df.filter(like='anomaly').columns: - anomaly_count += flagged_timeseries_df.loc[timestamp,column] + if anomaly is not None: + anomaly_count += flagged_timeseries_df.loc[timestamp,column] + else: + false_positives_count += flagged_timeseries_df.loc[timestamp,column] if anomaly is not None: total_detected_anomalies_n += anomaly_count breakdown_info[anomaly + '_anomaly' + '_count'] = anomaly_count breakdown_info[anomaly + '_anomaly' + '_ratio'] = anomaly_count/(frequency * variables_n) - else: - false_positives_count +=1 total_inserted_anomalies_n *= variables_n one_series_evaluation_result['false_positives_count'] = false_positives_count From d99d777c9e302e499fe4736809f611306715e669 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 11:07:37 +0100 Subject: [PATCH 10/15] Fx error on evaluation with point granularity Now false positives and anomalies are counted correctly --- ats/evaluators.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index fef71fe..dbab69d 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -222,14 +222,16 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakd if anomaly_labels_df[timestamp] == anomaly: for column in flagged_timeseries_df.filter(like='anomaly').columns: if flagged_timeseries_df.loc[timestamp,column]: - anomaly_count += 1 + if anomaly is not None: + anomaly_count += 1 + else: + false_positives_count += 1 break if anomaly is not None: total_detected_anomalies_n += anomaly_count breakdown_info[anomaly + '_anomaly_count'] = anomaly_count breakdown_info[anomaly + '_anomaly_ratio'] = anomaly_count/frequency - else: - false_positives_count += 1 + one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor one_series_evaluation_result['false_positives_count'] = false_positives_count From bf81c83f57641e874dfd326fd522a0329e73f74e Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 11:28:18 +0100 Subject: [PATCH 11/15] Add test on evaluation with variable granularity and breakdown This test showed an error in "_get_breakdown_info()" now fixed --- ats/evaluators.py | 24 +++++++++++++++--------- ats/tests/test_evaluators.py | 27 +++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 9 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index dbab69d..b3d1a8f 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -71,14 +71,15 @@ def _calculate_model_scores(single_model_evaluation={}): return model_scores def _get_breakdown_info(single_model_evaluation={}): - if 'anomalies_count' in single_model_evaluation.keys(): - del single_model_evaluation['anomalies_count'] - if 'anomalies_ratio' in single_model_evaluation.keys(): - del single_model_evaluation['anomalies_ratio'] - if 'false_positives_count' in single_model_evaluation.keys(): - del single_model_evaluation['false_positives_count'] - if 'false_positives_ratio' in single_model_evaluation.keys(): - del single_model_evaluation['false_positives_ratio'] + for sample in single_model_evaluation.keys(): + if 'anomalies_count' in single_model_evaluation[sample].keys(): + del single_model_evaluation[sample]['anomalies_count'] + if 'anomalies_ratio' in single_model_evaluation[sample].keys(): + del single_model_evaluation[sample]['anomalies_ratio'] + if 'false_positives_count' in single_model_evaluation[sample].keys(): + del single_model_evaluation[sample]['false_positives_count'] + if 'false_positives_ratio' in single_model_evaluation[sample].keys(): + del single_model_evaluation[sample]['false_positives_ratio'] breakdown_info = {} # how many series in the dataset have that anomaly type @@ -143,7 +144,12 @@ def evaluate(self,models={},granularity='point',strategy='flags'): else: raise ValueError(f'Unknown granularity {granularity}') - models_scores[model_name] = _calculate_model_scores(single_model_evaluation) + if breakdown: + scores = _calculate_model_scores(single_model_evaluation) + breakdown_info = _get_breakdown_info(single_model_evaluation) + models_scores[model_name] = scores | breakdown_info + else: + models_scores[model_name] = _calculate_model_scores(single_model_evaluation) j+=1 return models_scores diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index a9dee32..2ae59c6 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -541,6 +541,33 @@ def test_get_breakdown_info(self): self.assertAlmostEqual(breakdown['pattern_anomaly_count'],2) self.assertAlmostEqual(breakdown['pattern_anomaly_ratio'],0.5) + def test_variable_granularity_eval_with_breakdown(self): + dataset = [self.series1, self.series2, self.series3] + minmax1 = MinMaxAnomalyDetector() + minmax2 = MinMaxAnomalyDetector() + minmax3 = MinMaxAnomalyDetector() + models={'detector_1': minmax1, + 'detector_2': minmax2, + 'detector_3': minmax3 + } + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='variable',breakdown=True) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],7) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],25/48) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],5) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],31/126) + + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],5) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],13/24) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2) + + def test_point_granularity_eval_with_breakdown(self): + pass + + def test_series_granularity_eval_with_breakdown(self): + pass + def test_double_evaluator(self): anomalies = ['step_uv'] effects = [] From 53b4e8b7dd4bcf44861b29174c7e9a93d4ca9841 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Fri, 5 Dec 2025 18:17:38 +0100 Subject: [PATCH 12/15] Add test on evaluation with point granularity and breakdown This test showed a problem: some keys of the dictionary "breakdown_info" are None --- ats/tests/test_evaluators.py | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 2ae59c6..10f6af0 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -563,10 +563,25 @@ def test_variable_granularity_eval_with_breakdown(self): self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2) def test_point_granularity_eval_with_breakdown(self): - pass + dataset = [self.series1, self.series2, self.series3] + minmax1 = MinMaxAnomalyDetector() + minmax2 = MinMaxAnomalyDetector() + minmax3 = MinMaxAnomalyDetector() + models={'detector_1': minmax1, + 'detector_2': minmax2, + 'detector_3': minmax3 + } + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='data_point',breakdown=True) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/8) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21) - def test_series_granularity_eval_with_breakdown(self): - pass + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],5) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],13/24) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2) def test_double_evaluator(self): anomalies = ['step_uv'] From 6137fad716678e2aa21224de6460636f7aba94db Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Sat, 6 Dec 2025 19:05:21 +0100 Subject: [PATCH 13/15] Fix errors due to rebase "breakdown" argument was missing in "evaluate()" and 'data_point' was not changed to 'data' affter the rebase --- ats/evaluators.py | 2 +- ats/tests/test_evaluators.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index b3d1a8f..a6f0b7a 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -111,7 +111,7 @@ def _copy_dataset(self,dataset,models): dataset_copies.append(dataset_copy) return dataset_copies - def evaluate(self,models={},granularity='point',strategy='flags'): + def evaluate(self,models={},granularity='point',strategy='flags',breakdown=False): if strategy != 'flags': raise NotImplementedError(f'Evaluation strategy {strategy} is not implemented') diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 10f6af0..1c9cb6d 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -572,7 +572,7 @@ def test_point_granularity_eval_with_breakdown(self): 'detector_3': minmax3 } evaluator = Evaluator(test_data=dataset) - evaluation_results = evaluator.evaluate(models=models,granularity='data_point',breakdown=True) + evaluation_results = evaluator.evaluate(models=models,granularity='point',breakdown=True) self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6) self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/8) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4) From ac8d9c2fe0974baedc2364316f2178c102d47146 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Sat, 6 Dec 2025 19:33:54 +0100 Subject: [PATCH 14/15] Fix a bug in "_point_granularity_evaluation()" --- ats/evaluators.py | 4 +--- ats/tests/test_evaluators.py | 6 +++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/ats/evaluators.py b/ats/evaluators.py index a6f0b7a..3131576 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -136,7 +136,7 @@ def evaluate(self,models={},granularity='point',strategy='flags',breakdown=False flagged_dataset = _get_model_output(dataset_copies[j],model) for i,sample_df in enumerate(flagged_dataset): if granularity == 'point': - single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i]) + single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i],breakdown=breakdown) elif granularity == 'variable': single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown) elif granularity == 'series': @@ -238,8 +238,6 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakd breakdown_info[anomaly + '_anomaly_count'] = anomaly_count breakdown_info[anomaly + '_anomaly_ratio'] = anomaly_count/frequency - one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor - one_series_evaluation_result['false_positives_count'] = false_positives_count one_series_evaluation_result['false_positives_ratio'] = false_positives_count/normalization_factor one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 1c9cb6d..1f1bad9 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -578,10 +578,10 @@ def test_point_granularity_eval_with_breakdown(self): self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4) self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21) - self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],5) - self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],13/24) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],4) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],5/6) self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2) - self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1) def test_double_evaluator(self): anomalies = ['step_uv'] From 7ec213065b05ad97c360a4b397dd2b80cdeda111 Mon Sep 17 00:00:00 2001 From: Agata Benvegna Date: Sat, 6 Dec 2025 19:43:27 +0100 Subject: [PATCH 15/15] Add test on evaluation with series granularity and breakdown --- ats/tests/test_evaluators.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index 1f1bad9..bf859ac 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -583,6 +583,40 @@ def test_point_granularity_eval_with_breakdown(self): self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2) self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1) + def test_series_granularity_eval_with_breakdown(self): + series_1 = generate_timeseries_df(entries=3, variables=2) + series_1['anomaly_label'] = [None,None,'anomaly_1'] + series_2 = generate_timeseries_df(entries=3, variables=2) + series_2['anomaly_label'] = ['anomaly_1',None,None] + series_3 = generate_timeseries_df(entries=3, variables=2) + series_3['anomaly_label'] = [None,'anomaly_2',None] + dataset = [series_1, series_2, series_3] + minmax1 = MinMaxAnomalyDetector() + minmax2 = MinMaxAnomalyDetector() + minmax3 = MinMaxAnomalyDetector() + models={'detector_1': minmax1, + 'detector_2': minmax2, + 'detector_3': minmax3 + } + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='series',breakdown=True) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],3) + self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],0) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],0) + + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],2) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],1) + self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1) + + try: + dataset = [self.series1, self.series2, self.series3] + evaluator = Evaluator(test_data=dataset) + evaluation_results = evaluator.evaluate(models=models,granularity='series',breakdown=True) + except Exception as e: + self.assertIsInstance(e,ValueError) + def test_double_evaluator(self): anomalies = ['step_uv'] effects = []