From 5656759425da1af19015ed6b0406d4e41085837f Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Thu, 4 Dec 2025 21:24:08 +0100
Subject: [PATCH 01/15] Add the "breakdown" argument in "evaluate()"

---
 ats/evaluators.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 6a354e0..a7b5701 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -91,6 +91,7 @@ def _copy_dataset(self,dataset,models):
     def evaluate(self,models={},granularity='point',strategy='flags'):
         if strategy != 'flags':
             raise NotImplementedError(f'Evaluation strategy {strategy} is not implemented')
+
         if not models:
             raise ValueError('There are no models to evaluate')
         if not self.test_data:

From cd3216320837896bfbdc30725333270dce089b00 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Thu, 4 Dec 2025 22:25:28 +0100
Subject: [PATCH 02/15] Add "breakdown" argument in
 "_variable_granularity_evaluation()"

---
 ats/evaluators.py            | 21 ++++++++++++++-------
 ats/tests/test_evaluators.py | 30 ++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index a7b5701..00b19f4 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -115,7 +115,7 @@ def evaluate(self,models={},granularity='point',strategy='flags'):
                 if granularity == 'point':
                     single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i])
                 elif granularity == 'variable':
-                    single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i])
+                    single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown)
                 elif granularity == 'series':
                     single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i])
                 else:
@@ -143,7 +143,7 @@ def _get_model_output(dataset,model):
 
     return flagged_dataset
 
-def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
+def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakdown=False):
     one_series_evaluation_result = {}
     flag_columns_n = len(flagged_timeseries_df.filter(like='anomaly').columns)
     variables_n = len(flagged_timeseries_df.columns) - flag_columns_n
@@ -153,7 +153,8 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
 
     total_inserted_anomalies_n = 0
     total_detected_anomalies_n = 0
-    detection_counts_by_anomaly_type = {}
+    breakdown_info = {}
+    false_positives_count = 0
     for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items():
         if anomaly is not None:
             total_inserted_anomalies_n += frequency
@@ -164,17 +165,23 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
                     anomaly_count += flagged_timeseries_df.loc[timestamp,column]
         if anomaly is not None:
             total_detected_anomalies_n += anomaly_count
-        detection_counts_by_anomaly_type[anomaly] = anomaly_count
+            breakdown_info[anomaly + '_anomaly' + '_count'] = anomaly_count
+            breakdown_info[anomaly + '_anomaly' + '_ratio'] = anomaly_count/(frequency * variables_n)
+        else:
+            false_positives_count +=1
 
     total_inserted_anomalies_n *= variables_n
-    one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None)
-    one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor
+    one_series_evaluation_result['false_positives_count'] = false_positives_count
+    one_series_evaluation_result['false_positives_ratio'] = false_positives_count/normalization_factor
     one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n
     if total_inserted_anomalies_n:
         one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n
     else:
         one_series_evaluation_result['anomalies_ratio'] = None
-    return one_series_evaluation_result
+    if breakdown:
+        return one_series_evaluation_result | breakdown_info
+    else:
+        return one_series_evaluation_result
 
 def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
     one_series_evaluation_result = {}
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index e6a9de5..2d01729 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -375,6 +375,36 @@ def test_variable_granularity_evaluation(self):
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1)
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/(7*2))
 
+    def test_variable_granularity_evaluation_with_breakdown(self):
+        formatted_series,anomaly_labels = _format_for_anomaly_detector(self.series1)
+        minmax1 = MinMaxAnomalyDetector()
+        flagged_series = _get_model_output([formatted_series],minmax1)
+        evaluation_results = _variable_granularity_evaluation(flagged_series[0],anomaly_labels,breakdown=True)
+
+        self.assertIn('anomalies_count',evaluation_results.keys())
+        self.assertIn('anomalies_ratio',evaluation_results.keys())
+        self.assertIn('false_positives_count',evaluation_results.keys())
+        self.assertIn('false_positives_ratio',evaluation_results.keys())
+
+        self.assertIn('anomaly_1_anomaly_count',evaluation_results.keys())
+        self.assertIn('anomaly_1_anomaly_ratio',evaluation_results.keys())
+        self.assertIn('anomaly_2_anomaly_count',evaluation_results.keys())
+        self.assertIn('anomaly_2_anomaly_ratio',evaluation_results.keys())
+
+        self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_count'],3)
+        self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_ratio'],3/4)
+        self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_count'],1)
+        self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_ratio'],1/2)
+
+        formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(self.series3)
+        flagged_series1 = _get_model_output([formatted_series1],minmax1)
+        evaluation_results1 = _variable_granularity_evaluation(flagged_series1[0],anomaly_labels1,breakdown=True)
+
+        self.assertNotIn('anomaly_1_anomaly_count',evaluation_results1.keys())
+        self.assertNotIn('anomaly_1_anomaly_ratio',evaluation_results1.keys())
+        self.assertNotIn('anomaly_2_anomaly_count',evaluation_results1.keys())
+        self.assertNotIn('anomaly_2_anomaly_ratio',evaluation_results1.keys())
+
     def test_point_granularity_evaluation(self):
         dataset = [self.series1]
         evaluator = Evaluator(test_data=dataset)

From b106258215123ff29aa4e537a063af1e3ecd37cc Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Thu, 4 Dec 2025 22:40:16 +0100
Subject: [PATCH 03/15] Add "breakdown" argument in
 "_point_granularity_evaluation()"

---
 ats/evaluators.py            | 19 +++++++++++++------
 ats/tests/test_evaluators.py | 21 +++++++++++++++++++++
 2 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 00b19f4..9974390 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -183,13 +183,14 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,bre
     else:
         return one_series_evaluation_result
 
-def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
+def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakdown=False):
     one_series_evaluation_result = {}
     normalization_factor = len(flagged_timeseries_df)
 
     total_inserted_anomalies_n = 0
     total_detected_anomalies_n = 0
-    detection_counts_by_anomaly_type = {}
+    breakdown_info = {}
+    false_positives_count = 0
     for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items():
         if anomaly is not None:
             total_inserted_anomalies_n += frequency
@@ -202,17 +203,23 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
                         break
         if anomaly is not None:
             total_detected_anomalies_n += anomaly_count
-        detection_counts_by_anomaly_type[anomaly] = anomaly_count
+            breakdown_info[anomaly + '_anomaly_count'] = anomaly_count
+            breakdown_info[anomaly + '_anomaly_ratio'] = anomaly_count/frequency
+        else:
+            false_positives_count += 1
         one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor
 
-    one_series_evaluation_result['false_positives_count'] = detection_counts_by_anomaly_type.pop(None)
-    one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']/normalization_factor
+    one_series_evaluation_result['false_positives_count'] = false_positives_count
+    one_series_evaluation_result['false_positives_ratio'] = false_positives_count/normalization_factor
     one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n
     if total_inserted_anomalies_n:
         one_series_evaluation_result['anomalies_ratio'] = total_detected_anomalies_n/total_inserted_anomalies_n
     else:
         one_series_evaluation_result['anomalies_ratio'] = None
-    return one_series_evaluation_result
+    if breakdown:
+        return one_series_evaluation_result | breakdown_info
+    else:
+        return one_series_evaluation_result
 
 def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
     anomalies = []
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 2d01729..93555fb 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -431,6 +431,27 @@ def test_point_granularity_evaluation(self):
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1)
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1/7)
 
+    def test_point_granularity_evaluation_with_breakdown(self):
+        formatted_series,anomaly_labels = _format_for_anomaly_detector(self.series1)
+        minmax1 = MinMaxAnomalyDetector()
+        flagged_series = _get_model_output([formatted_series],minmax1)
+        evaluation_results = _point_granularity_evaluation(flagged_series[0],anomaly_labels,breakdown=True)
+
+        self.assertIn('anomalies_count',evaluation_results.keys())
+        self.assertIn('anomalies_ratio',evaluation_results.keys())
+        self.assertIn('false_positives_count',evaluation_results.keys())
+        self.assertIn('false_positives_ratio',evaluation_results.keys())
+
+        self.assertIn('anomaly_1_anomaly_count',evaluation_results.keys())
+        self.assertIn('anomaly_1_anomaly_ratio',evaluation_results.keys())
+        self.assertIn('anomaly_2_anomaly_count',evaluation_results.keys())
+        self.assertIn('anomaly_2_anomaly_ratio',evaluation_results.keys())
+
+        self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_count'],2)
+        self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_ratio'],2/2)
+        self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_count'],1)
+        self.assertAlmostEqual(evaluation_results['anomaly_2_anomaly_ratio'],1/1)
+
     def test_series_granularity_evaluation(self):
         dataset = [self.series1]
         evaluator = Evaluator(test_data=dataset)

From 36da9c115b4b5e658396b08f378546610fea3877 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Thu, 4 Dec 2025 23:19:27 +0100
Subject: [PATCH 04/15] Add "breakdown" argument in
 "_series_granularity_evaluation()"

---
 ats/evaluators.py            | 14 ++++++++++++--
 ats/tests/test_evaluators.py | 24 ++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 2 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 9974390..d76b8ef 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -221,22 +221,32 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakd
     else:
         return one_series_evaluation_result
 
-def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df):
+def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakdown=False):
     anomalies = []
     for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items():
         if anomaly is not None:
             anomalies.append(anomaly)
+    if len(anomalies) != 1 and breakdown:
+        raise ValueError('Series must have only 1 anomaly type for breakdown in mode granularity = "series"')
+    else:
+        inserted_anomaly = anomalies[0]
 
     one_series_evaluation_result = {}
+    breakdown_info = {}
     is_series_anomalous = 0
     for timestamp in flagged_timeseries_df.index:
         for column in flagged_timeseries_df.filter(like='anomaly').columns:
             if flagged_timeseries_df.loc[timestamp,column]:
                 is_series_anomalous = 1
+                breakdown_info[inserted_anomaly + '_anomaly_count'] = 1
+                breakdown_info[inserted_anomaly + '_anomaly_ratio'] = 1
                 break
     one_series_evaluation_result['false_positives_count'] = 1 if is_series_anomalous and not anomalies else 0
     one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']
     one_series_evaluation_result['anomalies_count'] = 1 if is_series_anomalous and anomalies else 0
     one_series_evaluation_result['anomalies_ratio'] = one_series_evaluation_result['anomalies_count'] if anomalies else None
 
-    return one_series_evaluation_result
\ No newline at end of file
+    if breakdown:
+        return one_series_evaluation_result | breakdown_info
+    else:
+        return one_series_evaluation_result
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 93555fb..5facf92 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -478,6 +478,30 @@ def test_series_granularity_evaluation(self):
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],1)
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],1)
 
+    def test_series_granularity_evaluation_with_breakdown(self):
+        series = generate_timeseries_df(entries=3, variables=2)
+        series['anomaly_label'] = [None,None,'anomaly_1']
+        formatted_series,anomaly_labels = _format_for_anomaly_detector(series)
+        minmax1 = MinMaxAnomalyDetector()
+        flagged_series = _get_model_output([formatted_series],minmax1)
+        evaluation_results = _series_granularity_evaluation(flagged_series[0],anomaly_labels,breakdown=True)
+
+        self.assertIn('anomalies_count',evaluation_results.keys())
+        self.assertIn('anomalies_ratio',evaluation_results.keys())
+        self.assertIn('false_positives_count',evaluation_results.keys())
+        self.assertIn('false_positives_ratio',evaluation_results.keys())
+        self.assertIn('anomaly_1_anomaly_count',evaluation_results.keys())
+        self.assertIn('anomaly_1_anomaly_ratio',evaluation_results.keys())
+        self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_count'],1)
+        self.assertAlmostEqual(evaluation_results['anomaly_1_anomaly_ratio'],1)
+
+        formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(self.series1)
+        flagged_series1 = _get_model_output([formatted_series1],minmax1)
+        try:
+            evaluation_results = _point_granularity_evaluation(flagged_series1[0],anomaly_labels1,breakdown=True)
+        except Exception as e:
+            self.assertIsInstance(e,ValueError)
+
     def test_double_evaluator(self):
         anomalies = ['step_uv']
         effects = []

From e3c27bc84b895c67059bccf2d350285000ff70f1 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 00:25:46 +0100
Subject: [PATCH 05/15] Add "_get_breakdown_info()" function

---
 ats/evaluators.py | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index d76b8ef..0060603 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -47,11 +47,6 @@ def evaluate_anomaly_detector(evaluated_timeseries_df, anomaly_labels, details=F
 
 
 def _calculate_model_scores(single_model_evaluation={}):
-    dataset_anomalies = set()
-    for sample in single_model_evaluation.keys():
-        sample_anomalies = set(single_model_evaluation[sample].keys())
-        dataset_anomalies.update(sample_anomalies)
-
     model_scores = {}
     anomalies_count = 0
     false_positives_count = 0
@@ -73,9 +68,11 @@ def _calculate_model_scores(single_model_evaluation={}):
         model_scores['anomalies_ratio'] = None
     model_scores['false_positives_count'] = false_positives_count
     model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation)
-
     return model_scores
 
+    def _get_breakdown_info(single_model_evaluation={}):
+        pass
+
 
 class Evaluator():
     def __init__(self,test_data):
@@ -120,8 +117,10 @@ def evaluate(self,models={},granularity='point',strategy='flags'):
                     single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i])
                 else:
                     raise ValueError(f'Unknown granularity {granularity}')
-                
-            models_scores[model_name] = _calculate_model_scores(single_model_evaluation)
+            if breakdown:    
+                models_scores[model_name] = _calculate_model_scores(single_model_evaluation) | _get_breakdown_info(single_model_evaluation)
+            else:
+                models_scores[model_name] = _calculate_model_scores(single_model_evaluation)
             j+=1
 
         return models_scores

From c87d9c8bf93314bf25b1834ec0469d66b6b8b1f8 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 00:26:52 +0100
Subject: [PATCH 06/15] Add "breakdown" argument in using internal functions
 inside "evaluate()"

---
 ats/evaluators.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 0060603..5a30d6c 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -114,7 +114,7 @@ def evaluate(self,models={},granularity='point',strategy='flags'):
                 elif granularity == 'variable':
                     single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown)
                 elif granularity == 'series':
-                    single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i])
+                    single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown)
                 else:
                     raise ValueError(f'Unknown granularity {granularity}')
             if breakdown:    

From d44f78b2b3385c871a55020efb22c2f94569defb Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 10:45:08 +0100
Subject: [PATCH 07/15] Add "_get_breakdown_info()" function

---
 ats/evaluators.py            | 29 +++++++++++++++++++++++++--
 ats/tests/test_evaluators.py | 39 ++++++++++++++++++++++++++++++++++++
 2 files changed, 66 insertions(+), 2 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 5a30d6c..1f93f59 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -70,8 +70,33 @@ def _calculate_model_scores(single_model_evaluation={}):
     model_scores['false_positives_ratio'] = false_positives_ratio/len(single_model_evaluation)
     return model_scores
 
-    def _get_breakdown_info(single_model_evaluation={}):
-        pass
+def _get_breakdown_info(single_model_evaluation={}):
+    if 'anomalies_count' in single_model_evaluation.keys():
+        del single_model_evaluation['anomalies_count']
+    if 'anomalies_ratio' in single_model_evaluation.keys():
+        del single_model_evaluation['anomalies_ratio']
+    if 'false_positives_count' in single_model_evaluation.keys():
+        del single_model_evaluation['false_positives_count']
+    if 'false_positives_ratio' in single_model_evaluation.keys():
+        del single_model_evaluation['false_positives_ratio']
+
+    breakdown_info = {}
+    # how many series in the dataset have that anomaly type
+    anomaly_series_count_by_type = {}
+    for sample, sample_evaluation in single_model_evaluation.items():
+        for key in sample_evaluation.keys():
+            if key in breakdown_info.keys():
+                anomaly_series_count_by_type[key] +=1
+                breakdown_info[key] += sample_evaluation[key]
+            else:
+                anomaly_series_count_by_type[key] =1
+                breakdown_info[key] = sample_evaluation[key]
+
+    for key in breakdown_info.keys():
+        if '_ratio' in key:
+            breakdown_info[key] /= anomaly_series_count_by_type[key]
+
+    return breakdown_info
 
 
 class Evaluator():
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 5facf92..a9dee32 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -9,6 +9,7 @@
 from ..evaluators import _variable_granularity_evaluation
 from ..evaluators import _point_granularity_evaluation
 from ..evaluators import _series_granularity_evaluation
+from ..evaluators import _get_breakdown_info
 import unittest
 import pandas as pd
 import random as rnd
@@ -502,6 +503,44 @@ def test_series_granularity_evaluation_with_breakdown(self):
         except Exception as e:
             self.assertIsInstance(e,ValueError)
 
+    def test_get_breakdown_info(self):
+        single_model_evaluation = { 'sample_1': {'anomalies_count': 3, 'anomalies_ratio': 1.5,
+                                                    'false_positives_count': 1, 
+                                                    'false_positives_ratio': 0.14,
+                                                    'spike_anomaly_count': 1,
+                                                    'spike_anomaly_ratio': 0.5},
+                                    'sample_2': {'anomalies_count': 3, 'anomalies_ratio': 1.5,
+                                                    'false_positives_count': 1, 
+                                                    'false_positives_ratio': 0.14,
+                                                    'spike_anomaly_count': 1,
+                                                    'spike_anomaly_ratio': 0.5,
+                                                    'step_anomaly_count': 2,
+                                                    'step_anomaly_ratio': 2/3
+                                                    },
+                                    'sample_3': {'anomalies_count': 3, 'anomalies_ratio': 1.5,
+                                                    'false_positives_count': 1,
+                                                    'false_positives_ratio': 0.14,
+                                                    'step_anomaly_count': 3,
+                                                    'step_anomaly_ratio': 1,
+                                                    'pattern_anomaly_count': 2,
+                                                    'pattern_anomaly_ratio': 0.5
+                                                    }
+        }
+        breakdown = _get_breakdown_info(single_model_evaluation)
+        self.assertIn('spike_anomaly_count',breakdown.keys())
+        self.assertIn('spike_anomaly_ratio',breakdown.keys())
+        self.assertIn('step_anomaly_count',breakdown.keys())
+        self.assertIn('step_anomaly_ratio',breakdown.keys())
+        self.assertIn('pattern_anomaly_count',breakdown.keys())
+        self.assertIn('pattern_anomaly_ratio',breakdown.keys())
+
+        self.assertAlmostEqual(breakdown['spike_anomaly_count'],2)
+        self.assertAlmostEqual(breakdown['spike_anomaly_ratio'],1/2)
+        self.assertAlmostEqual(breakdown['step_anomaly_count'],5)
+        self.assertAlmostEqual(breakdown['step_anomaly_ratio'],5/6)
+        self.assertAlmostEqual(breakdown['pattern_anomaly_count'],2)
+        self.assertAlmostEqual(breakdown['pattern_anomaly_ratio'],0.5)
+
     def test_double_evaluator(self):
         anomalies = ['step_uv']
         effects = []

From 987cde484cfaa33fddab5c0dfad7105974199860 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 10:57:15 +0100
Subject: [PATCH 08/15] Fix an error

---
 ats/evaluators.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 1f93f59..0beeb29 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -142,10 +142,8 @@ def evaluate(self,models={},granularity='point',strategy='flags'):
                     single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown)
                 else:
                     raise ValueError(f'Unknown granularity {granularity}')
-            if breakdown:    
-                models_scores[model_name] = _calculate_model_scores(single_model_evaluation) | _get_breakdown_info(single_model_evaluation)
-            else:
-                models_scores[model_name] = _calculate_model_scores(single_model_evaluation)
+
+            models_scores[model_name] = _calculate_model_scores(single_model_evaluation)
             j+=1
 
         return models_scores
@@ -252,8 +250,6 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,break
             anomalies.append(anomaly)
     if len(anomalies) != 1 and breakdown:
         raise ValueError('Series must have only 1 anomaly type for breakdown in mode granularity = "series"')
-    else:
-        inserted_anomaly = anomalies[0]
 
     one_series_evaluation_result = {}
     breakdown_info = {}
@@ -262,8 +258,10 @@ def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,break
         for column in flagged_timeseries_df.filter(like='anomaly').columns:
             if flagged_timeseries_df.loc[timestamp,column]:
                 is_series_anomalous = 1
-                breakdown_info[inserted_anomaly + '_anomaly_count'] = 1
-                breakdown_info[inserted_anomaly + '_anomaly_ratio'] = 1
+                if anomalies:
+                    inserted_anomaly = anomalies[0]
+                    breakdown_info[inserted_anomaly + '_anomaly_count'] = 1
+                    breakdown_info[inserted_anomaly + '_anomaly_ratio'] = 1
                 break
     one_series_evaluation_result['false_positives_count'] = 1 if is_series_anomalous and not anomalies else 0
     one_series_evaluation_result['false_positives_ratio'] = one_series_evaluation_result['false_positives_count']

From 802bfe42bbf9b3316aee0870929305d65b72f398 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 11:03:33 +0100
Subject: [PATCH 09/15] Fix errors in evaluation with variable granularity

Now false positives and anomalies are counted correctly
---
 ats/evaluators.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index 0beeb29..fef71fe 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -184,13 +184,14 @@ def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,bre
         for timestamp in flagged_timeseries_df.index:
             if anomaly_labels_df[timestamp] == anomaly:
                 for column in flagged_timeseries_df.filter(like='anomaly').columns:
-                    anomaly_count += flagged_timeseries_df.loc[timestamp,column]
+                    if anomaly is not None:
+                        anomaly_count += flagged_timeseries_df.loc[timestamp,column]
+                    else:
+                        false_positives_count += flagged_timeseries_df.loc[timestamp,column]
         if anomaly is not None:
             total_detected_anomalies_n += anomaly_count
             breakdown_info[anomaly + '_anomaly' + '_count'] = anomaly_count
             breakdown_info[anomaly + '_anomaly' + '_ratio'] = anomaly_count/(frequency * variables_n)
-        else:
-            false_positives_count +=1
 
     total_inserted_anomalies_n *= variables_n
     one_series_evaluation_result['false_positives_count'] = false_positives_count

From d99d777c9e302e499fe4736809f611306715e669 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 11:07:37 +0100
Subject: [PATCH 10/15] Fx error on evaluation with point granularity

Now false positives and anomalies are counted correctly
---
 ats/evaluators.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index fef71fe..dbab69d 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -222,14 +222,16 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakd
             if anomaly_labels_df[timestamp] == anomaly:
                 for column in flagged_timeseries_df.filter(like='anomaly').columns:
                     if flagged_timeseries_df.loc[timestamp,column]:
-                        anomaly_count += 1
+                        if anomaly is not None:
+                            anomaly_count += 1
+                        else:
+                            false_positives_count += 1
                         break
         if anomaly is not None:
             total_detected_anomalies_n += anomaly_count
             breakdown_info[anomaly + '_anomaly_count'] = anomaly_count
             breakdown_info[anomaly + '_anomaly_ratio'] = anomaly_count/frequency
-        else:
-            false_positives_count += 1
+
         one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor
 
     one_series_evaluation_result['false_positives_count'] = false_positives_count

From bf81c83f57641e874dfd326fd522a0329e73f74e Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 11:28:18 +0100
Subject: [PATCH 11/15] Add test on evaluation with variable granularity and
 breakdown

This test showed an error in "_get_breakdown_info()" now fixed
---
 ats/evaluators.py            | 24 +++++++++++++++---------
 ats/tests/test_evaluators.py | 27 +++++++++++++++++++++++++++
 2 files changed, 42 insertions(+), 9 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index dbab69d..b3d1a8f 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -71,14 +71,15 @@ def _calculate_model_scores(single_model_evaluation={}):
     return model_scores
 
 def _get_breakdown_info(single_model_evaluation={}):
-    if 'anomalies_count' in single_model_evaluation.keys():
-        del single_model_evaluation['anomalies_count']
-    if 'anomalies_ratio' in single_model_evaluation.keys():
-        del single_model_evaluation['anomalies_ratio']
-    if 'false_positives_count' in single_model_evaluation.keys():
-        del single_model_evaluation['false_positives_count']
-    if 'false_positives_ratio' in single_model_evaluation.keys():
-        del single_model_evaluation['false_positives_ratio']
+    for sample in single_model_evaluation.keys():
+        if 'anomalies_count' in single_model_evaluation[sample].keys():
+            del single_model_evaluation[sample]['anomalies_count']
+        if 'anomalies_ratio' in single_model_evaluation[sample].keys():
+            del single_model_evaluation[sample]['anomalies_ratio']
+        if 'false_positives_count' in single_model_evaluation[sample].keys():
+            del single_model_evaluation[sample]['false_positives_count']
+        if 'false_positives_ratio' in single_model_evaluation[sample].keys():
+            del single_model_evaluation[sample]['false_positives_ratio']
 
     breakdown_info = {}
     # how many series in the dataset have that anomaly type
@@ -143,7 +144,12 @@ def evaluate(self,models={},granularity='point',strategy='flags'):
                 else:
                     raise ValueError(f'Unknown granularity {granularity}')
 
-            models_scores[model_name] = _calculate_model_scores(single_model_evaluation)
+            if breakdown:
+                scores = _calculate_model_scores(single_model_evaluation)
+                breakdown_info = _get_breakdown_info(single_model_evaluation)
+                models_scores[model_name] = scores | breakdown_info
+            else:
+                models_scores[model_name] = _calculate_model_scores(single_model_evaluation)
             j+=1
 
         return models_scores
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index a9dee32..2ae59c6 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -541,6 +541,33 @@ def test_get_breakdown_info(self):
         self.assertAlmostEqual(breakdown['pattern_anomaly_count'],2)
         self.assertAlmostEqual(breakdown['pattern_anomaly_ratio'],0.5)
 
+    def test_variable_granularity_eval_with_breakdown(self):
+        dataset = [self.series1, self.series2, self.series3]
+        minmax1 = MinMaxAnomalyDetector()
+        minmax2 = MinMaxAnomalyDetector()
+        minmax3 = MinMaxAnomalyDetector()
+        models={'detector_1': minmax1,
+                'detector_2': minmax2,
+                'detector_3': minmax3
+                }
+        evaluator = Evaluator(test_data=dataset)
+        evaluation_results = evaluator.evaluate(models=models,granularity='variable',breakdown=True)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],7)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],25/48)
+        self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],5)
+        self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],31/126)
+
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],5)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],13/24)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2)
+
+    def test_point_granularity_eval_with_breakdown(self):
+        pass
+
+    def test_series_granularity_eval_with_breakdown(self):
+        pass
+
     def test_double_evaluator(self):
         anomalies = ['step_uv']
         effects = []

From 53b4e8b7dd4bcf44861b29174c7e9a93d4ca9841 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Fri, 5 Dec 2025 18:17:38 +0100
Subject: [PATCH 12/15] Add test on evaluation with point granularity and
 breakdown

This test showed a problem: some keys of the dictionary "breakdown_info" are None
---
 ats/tests/test_evaluators.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 2ae59c6..10f6af0 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -563,10 +563,25 @@ def test_variable_granularity_eval_with_breakdown(self):
         self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2)
 
     def test_point_granularity_eval_with_breakdown(self):
-        pass
+        dataset = [self.series1, self.series2, self.series3]
+        minmax1 = MinMaxAnomalyDetector()
+        minmax2 = MinMaxAnomalyDetector()
+        minmax3 = MinMaxAnomalyDetector()
+        models={'detector_1': minmax1,
+                'detector_2': minmax2,
+                'detector_3': minmax3
+                }
+        evaluator = Evaluator(test_data=dataset)
+        evaluation_results = evaluator.evaluate(models=models,granularity='data_point',breakdown=True)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/8)
+        self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4)
+        self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21)
 
-    def test_series_granularity_eval_with_breakdown(self):
-        pass
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],5)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],13/24)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2)
 
     def test_double_evaluator(self):
         anomalies = ['step_uv']

From 6137fad716678e2aa21224de6460636f7aba94db Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Sat, 6 Dec 2025 19:05:21 +0100
Subject: [PATCH 13/15] Fix errors due to rebase

"breakdown" argument was missing in "evaluate()" and 'data_point' was not changed to 'data'
affter the rebase
---
 ats/evaluators.py            | 2 +-
 ats/tests/test_evaluators.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index b3d1a8f..a6f0b7a 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -111,7 +111,7 @@ def _copy_dataset(self,dataset,models):
             dataset_copies.append(dataset_copy)
         return dataset_copies
 
-    def evaluate(self,models={},granularity='point',strategy='flags'):
+    def evaluate(self,models={},granularity='point',strategy='flags',breakdown=False):
         if strategy != 'flags':
             raise NotImplementedError(f'Evaluation strategy {strategy} is not implemented')
 
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 10f6af0..1c9cb6d 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -572,7 +572,7 @@ def test_point_granularity_eval_with_breakdown(self):
                 'detector_3': minmax3
                 }
         evaluator = Evaluator(test_data=dataset)
-        evaluation_results = evaluator.evaluate(models=models,granularity='data_point',breakdown=True)
+        evaluation_results = evaluator.evaluate(models=models,granularity='point',breakdown=True)
         self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],6)
         self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],7/8)
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4)

From ac8d9c2fe0974baedc2364316f2178c102d47146 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Sat, 6 Dec 2025 19:33:54 +0100
Subject: [PATCH 14/15] Fix a bug in "_point_granularity_evaluation()"

---
 ats/evaluators.py            | 4 +---
 ats/tests/test_evaluators.py | 6 +++---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/ats/evaluators.py b/ats/evaluators.py
index a6f0b7a..3131576 100644
--- a/ats/evaluators.py
+++ b/ats/evaluators.py
@@ -136,7 +136,7 @@ def evaluate(self,models={},granularity='point',strategy='flags',breakdown=False
             flagged_dataset = _get_model_output(dataset_copies[j],model)
             for i,sample_df in enumerate(flagged_dataset):
                 if granularity == 'point':
-                    single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i])
+                    single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i],breakdown=breakdown)
                 elif granularity == 'variable':
                     single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i], breakdown = breakdown)
                 elif granularity == 'series':
@@ -238,8 +238,6 @@ def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df,breakd
             breakdown_info[anomaly + '_anomaly_count'] = anomaly_count
             breakdown_info[anomaly + '_anomaly_ratio'] = anomaly_count/frequency
 
-        one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor
-
     one_series_evaluation_result['false_positives_count'] = false_positives_count
     one_series_evaluation_result['false_positives_ratio'] = false_positives_count/normalization_factor
     one_series_evaluation_result['anomalies_count'] = total_detected_anomalies_n
diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 1c9cb6d..1f1bad9 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -578,10 +578,10 @@ def test_point_granularity_eval_with_breakdown(self):
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],4)
         self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],8/21)
 
-        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],5)
-        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],13/24)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],4)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],5/6)
         self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2)
-        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1/2)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1)
 
     def test_double_evaluator(self):
         anomalies = ['step_uv']

From 7ec213065b05ad97c360a4b397dd2b80cdeda111 Mon Sep 17 00:00:00 2001
From: Agata Benvegna <Agataben23@gmail.com>
Date: Sat, 6 Dec 2025 19:43:27 +0100
Subject: [PATCH 15/15] Add test on evaluation with series granularity and
 breakdown

---
 ats/tests/test_evaluators.py | 34 ++++++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py
index 1f1bad9..bf859ac 100644
--- a/ats/tests/test_evaluators.py
+++ b/ats/tests/test_evaluators.py
@@ -583,6 +583,40 @@ def test_point_granularity_eval_with_breakdown(self):
         self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],2)
         self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1)
 
+    def test_series_granularity_eval_with_breakdown(self):
+        series_1 = generate_timeseries_df(entries=3, variables=2)
+        series_1['anomaly_label'] = [None,None,'anomaly_1']
+        series_2 = generate_timeseries_df(entries=3, variables=2)
+        series_2['anomaly_label'] = ['anomaly_1',None,None]
+        series_3 = generate_timeseries_df(entries=3, variables=2)
+        series_3['anomaly_label'] = [None,'anomaly_2',None]
+        dataset = [series_1, series_2, series_3]
+        minmax1 = MinMaxAnomalyDetector()
+        minmax2 = MinMaxAnomalyDetector()
+        minmax3 = MinMaxAnomalyDetector()
+        models={'detector_1': minmax1,
+                'detector_2': minmax2,
+                'detector_3': minmax3
+                }
+        evaluator = Evaluator(test_data=dataset)
+        evaluation_results = evaluator.evaluate(models=models,granularity='series',breakdown=True)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_count'],3)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomalies_ratio'],1)
+        self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_count'],0)
+        self.assertAlmostEqual(evaluation_results['detector_1']['false_positives_ratio'],0)
+
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_count'],2)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_1_anomaly_ratio'],1)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_count'],1)
+        self.assertAlmostEqual(evaluation_results['detector_1']['anomaly_2_anomaly_ratio'],1)
+
+        try:
+            dataset = [self.series1, self.series2, self.series3]
+            evaluator = Evaluator(test_data=dataset)
+            evaluation_results = evaluator.evaluate(models=models,granularity='series',breakdown=True)
+        except Exception as e:
+            self.assertIsInstance(e,ValueError)
+
     def test_double_evaluator(self):
         anomalies = ['step_uv']
         effects = []