diff --git a/ats/evaluators.py b/ats/evaluators.py index 4bbce18..56187b9 100644 --- a/ats/evaluators.py +++ b/ats/evaluators.py @@ -46,27 +46,29 @@ def evaluate_anomaly_detector(evaluated_timeseries_df, anomaly_labels, details=F return evaluation_results -def _calculate_model_scores(single_model_evaluation={}): - anomalies = list(single_model_evaluation['sample_1'].keys()) - samples_n = len(single_model_evaluation) - detections_per_anomaly = {} - avg_detections_per_anomaly = {} +def _calculate_model_scores(single_model_evaluation={},granularity='data_point'): + dataset_anomalies = set() + for sample in single_model_evaluation.keys(): + sample_anomalies = set(single_model_evaluation[sample].keys()) + dataset_anomalies.update(sample_anomalies) - for anomaly in anomalies: - detections_per_anomaly[anomaly] = 0 + anomaly_scores = {} + for anomaly in dataset_anomalies: + anomaly_scores[anomaly] = 0 + if 'false_positives' not in dataset_anomalies: + anomaly_scores['false_positives'] = 0.0 - for sample in single_model_evaluation.keys(): - for anomaly in single_model_evaluation[sample].keys(): - # TODO: evaluate_anomaly_detector and calculate_model_scores are redundant - if single_model_evaluation[sample][anomaly] and anomaly != 'false_positives': - detections_per_anomaly[anomaly] +=1 - elif anomaly == 'false_positives': - detections_per_anomaly[anomaly] +=single_model_evaluation[sample][anomaly] + for anomaly in dataset_anomalies: + for sample in single_model_evaluation.keys(): + if anomaly in single_model_evaluation[sample].keys(): + anomaly_scores[anomaly] += single_model_evaluation[sample][anomaly] - for anomaly,counts in detections_per_anomaly.items(): - avg_detections_per_anomaly[anomaly] = counts/samples_n if anomaly != 'false_positives' else counts + if granularity == 'series': + samples_n = len(single_model_evaluation) + for key in anomaly_scores.keys(): + anomaly_scores[key] /= samples_n - return avg_detections_per_anomaly + return anomaly_scores class Evaluator(): @@ -80,7 +82,7 @@ def _copy_dataset(self,dataset,models): dataset_copies.append(dataset_copy) return dataset_copies - def evaluate(self,models={}): + def evaluate(self,models={},granularity='data_point'): if not models: raise ValueError('There are no models to evaluate') if not self.test_data: @@ -101,8 +103,14 @@ def evaluate(self,models={}): single_model_evaluation = {} flagged_dataset = _get_model_output(dataset_copies[j],model) for i,sample_df in enumerate(flagged_dataset): - single_model_evaluation[f'sample_{i+1}'] = evaluate_anomaly_detector(sample_df,anomaly_labels_list[i]) - models_scores[model_name] = _calculate_model_scores(single_model_evaluation) + if granularity == 'data_point': + single_model_evaluation[f'sample_{i+1}'] = _point_granularity_evaluation(sample_df,anomaly_labels_list[i]) + if granularity == 'variable': + single_model_evaluation[f'sample_{i+1}'] = _variable_granularity_evaluation(sample_df,anomaly_labels_list[i]) + if granularity == 'series': + single_model_evaluation[f'sample_{i+1}'] = _series_granularity_evaluation(sample_df,anomaly_labels_list[i]) + + models_scores[model_name] = _calculate_model_scores(single_model_evaluation,granularity=granularity) j+=1 return models_scores @@ -123,3 +131,64 @@ def _get_model_output(dataset,model): flagged_dataset.append(flagged_series) return flagged_dataset + +def _variable_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): + one_series_evaluation_result = {} + flag_columns_n = len(flagged_timeseries_df.filter(like='anomaly').columns) + variables_n = len(flagged_timeseries_df.columns) - flag_columns_n + if variables_n != 1 and variables_n != flag_columns_n: + raise ValueError('Variable granularity is not for this model') + normalization_factor = variables_n * len(flagged_timeseries_df) + + for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): + anomaly_count = 0 + for timestamp in flagged_timeseries_df.index: + if anomaly_labels_df[timestamp] == anomaly: + for column in flagged_timeseries_df.filter(like='anomaly').columns: + anomaly_count += flagged_timeseries_df.loc[timestamp,column] + one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor + + one_series_evaluation_result['false_positives'] = one_series_evaluation_result.pop(None) + return one_series_evaluation_result + +def _point_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): + one_series_evaluation_result = {} + normalization_factor = len(flagged_timeseries_df) + + for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): + anomaly_count = 0 + for timestamp in flagged_timeseries_df.index: + if anomaly_labels_df[timestamp] == anomaly: + for column in flagged_timeseries_df.filter(like='anomaly').columns: + if flagged_timeseries_df.loc[timestamp,column]: + anomaly_count += 1 + break + one_series_evaluation_result[anomaly] = anomaly_count / normalization_factor + + one_series_evaluation_result['false_positives'] = one_series_evaluation_result.pop(None) + return one_series_evaluation_result + +def _series_granularity_evaluation(flagged_timeseries_df,anomaly_labels_df): + anomalies = [] + for anomaly,frequency in anomaly_labels_df.value_counts(dropna=False).items(): + if anomaly is not None: + anomalies.append(anomaly) + anomalies_n = len(anomalies) + if anomalies_n > 1: + raise ValueError('Evaluation with series granularity supports series with only one anomaly') + + one_series_evaluation_result = {} + is_series_anomalous = 0 + for timestamp in flagged_timeseries_df.index: + for column in flagged_timeseries_df.filter(like='anomaly').columns: + if flagged_timeseries_df.loc[timestamp,column]: + is_series_anomalous = 1 + break + if is_series_anomalous and not anomalies: + one_series_evaluation_result['false_positives'] = 1 + elif is_series_anomalous and anomalies: + one_series_evaluation_result[anomalies[0]] = 1 + elif not is_series_anomalous and anomalies: + one_series_evaluation_result[anomalies[0]] = 0 + + return one_series_evaluation_result \ No newline at end of file diff --git a/ats/tests/test_evaluators.py b/ats/tests/test_evaluators.py index c0cc581..cba8342 100644 --- a/ats/tests/test_evaluators.py +++ b/ats/tests/test_evaluators.py @@ -6,6 +6,9 @@ from ..evaluators import _format_for_anomaly_detector from ..evaluators import _calculate_model_scores from ..evaluators import Evaluator +from ..evaluators import _variable_granularity_evaluation +from ..evaluators import _point_granularity_evaluation +from ..evaluators import _series_granularity_evaluation import unittest import pandas as pd import random as rnd @@ -239,31 +242,98 @@ def test_get_model_output(self): def test_calculate_model_scores(self): single_model_evaluation = { 'sample_1': { - 'anomaly_1': True, - 'anomaly_2': False, - 'false_positives': 2 + 'anomaly_1': 0.5, + 'anomaly_2': 0.2, + 'false_positives': 0.6 + }, + 'sample_2': { + 'anomaly_1': 0.3, + 'anomaly_2': 0.4, + 'anomaly_3': 0.1, + 'false_positives': 0.2 + }, + } + model_scores = _calculate_model_scores(single_model_evaluation,granularity='data_point') + # model_scores: + # { 'anomaly_3': 0.1, + # 'anomaly_1': 0.8, + # 'false_positives': 0.8, + # 'anomaly_2': 0.6 + # } + self.assertEqual(len(model_scores),4) + self.assertIsInstance(model_scores,dict) + self.assertIn('anomaly_1',model_scores.keys()) + self.assertIn('anomaly_2',model_scores.keys()) + self.assertIn('anomaly_3',model_scores.keys()) + self.assertIn('false_positives',model_scores.keys()) + self.assertAlmostEqual(model_scores['anomaly_1'],0.8) + self.assertAlmostEqual(model_scores['anomaly_2'],0.6) + self.assertAlmostEqual(model_scores['anomaly_3'],0.1) + self.assertAlmostEqual(model_scores['false_positives'],0.8) + + def test_calculate_model_score_series_granularity(self): + single_model_evaluation = { + 'sample_1': { + 'anomaly_1': 1, }, 'sample_2': { - 'anomaly_1': True, - 'anomaly_2': True, 'false_positives': 1 }, + 'sample_3': { + 'anomaly_2': 1 + } } - model_scores = _calculate_model_scores(single_model_evaluation) + model_scores = _calculate_model_scores(single_model_evaluation,granularity='series') + # model_scores: + # { 'anomaly_1': 0.3333333333333333, + # 'false_positives': 0.3333333333333333, + # 'anomaly_2': 0.3333333333333333 + # } self.assertEqual(len(model_scores),3) self.assertIsInstance(model_scores,dict) self.assertIn('anomaly_1',model_scores.keys()) self.assertIn('anomaly_2',model_scores.keys()) self.assertIn('false_positives',model_scores.keys()) - self.assertAlmostEqual(model_scores['anomaly_1'],1.0) - self.assertAlmostEqual(model_scores['anomaly_2'],0.5) - self.assertAlmostEqual(model_scores['false_positives'],3) + self.assertAlmostEqual(model_scores['anomaly_1'],0.3333333333333333) + self.assertAlmostEqual(model_scores['anomaly_2'],0.3333333333333333) + self.assertAlmostEqual(model_scores['false_positives'],0.333333333333333) + + def test_evaluate_point_granularity(self): + anomalies = ['step_uv'] + effects = [] + # series with 2880 data points + series_generator = HumiTempTimeseriesGenerator() + series1 = series_generator.generate(anomalies=anomalies,effects=effects) + series2 = series_generator.generate(anomalies=anomalies,effects=effects) + dataset = [series1,series2] + evaluator = Evaluator(test_data=dataset) + minmax1 = MinMaxAnomalyDetector() + minmax2 = MinMaxAnomalyDetector() + minmax3 = MinMaxAnomalyDetector() + models={'detector_1': minmax1, + 'detector_2': minmax2, + 'detector_3': minmax3 + } + evaluation_results = evaluator.evaluate(models=models,granularity='data_point') + # Evaluation_results: + # detector_1: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} + # detector_2: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} + # detector_3: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} + self.assertIsInstance(evaluation_results,dict) + self.assertEqual(len(evaluation_results),3) + self.assertEqual(len(evaluation_results['detector_1']),2) + self.assertEqual(len(evaluation_results['detector_2']),2) + self.assertEqual(len(evaluation_results['detector_3']),2) + self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.000694444444444444) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.000694444444444444) - def test_evaluate(self): - anomalies = ['spike_uv','step_uv'] + def test_evaluate_variable_granularity(self): + anomalies = ['step_uv'] + effects = [] + # series with 2880 data points series_generator = HumiTempTimeseriesGenerator() - series1 = series_generator.generate(anomalies=anomalies) - series2 = series_generator.generate(anomalies=anomalies) + series1 = series_generator.generate(anomalies=anomalies,effects=effects) + series2 = series_generator.generate(anomalies=anomalies,effects=effects) dataset = [series1,series2] evaluator = Evaluator(test_data=dataset) minmax1 = MinMaxAnomalyDetector() @@ -273,17 +343,92 @@ def test_evaluate(self): 'detector_2': minmax2, 'detector_3': minmax3 } - evaluation_results = evaluator.evaluate(models=models) + evaluation_results = evaluator.evaluate(models=models,granularity='variable') # Evaluation_results: - # detector_1: {'step_uv': 1.0, 'spike_uv': 0.0, 'false_positives': 4} - # detector_2: {'step_uv': 1.0, 'spike_uv': 0.0, 'false_positives': 4} - # detector_3: {'step_uv': 1.0, 'spike_uv': 0.0, 'false_positives': 4} + # detector_1: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} + # detector_2: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} + # detector_3: {'step_uv': 0.000694444444444444, 'false_positives': 0.000694444444444444} + self.assertIsInstance(evaluation_results,dict) + self.assertEqual(len(evaluation_results),3) + self.assertEqual(len(evaluation_results['detector_1']),2) + self.assertEqual(len(evaluation_results['detector_2']),2) + self.assertEqual(len(evaluation_results['detector_3']),2) + self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.000694444444444444) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.000694444444444444) + + def test_evaluate_series_granularity(self): + anomalies = ['step_uv'] + effects = [] + series_generator = HumiTempTimeseriesGenerator() + # series1 will be a true anomaly for the minmax + series1 = series_generator.generate(anomalies=anomalies,effects=effects) + # series2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) + series2 = series_generator.generate(anomalies=[],effects=effects) + dataset = [series1,series2] + evaluator = Evaluator(test_data=dataset) + minmax1 = MinMaxAnomalyDetector() + minmax2 = MinMaxAnomalyDetector() + minmax3 = MinMaxAnomalyDetector() + models={'detector_1': minmax1, + 'detector_2': minmax2, + 'detector_3': minmax3 + } + evaluation_results = evaluator.evaluate(models=models,granularity='series') + # Evaluation_results: + # detector_1: {'step_uv': 0.5, 'false_positives': 0.5} + # detector_2: {'step_uv': 0.5, 'false_positives': 0.5} + # detector_3: {'step_uv': 0.5, 'false_positives': 0.5} + + self.assertIsInstance(evaluation_results,dict) + self.assertEqual(len(evaluation_results),3) + self.assertEqual(len(evaluation_results['detector_1']),2) + self.assertEqual(len(evaluation_results['detector_2']),2) + self.assertEqual(len(evaluation_results['detector_3']),2) + self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.5) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.5) + + def test_series_granularity_eval_with_non_detected_anomalies(self): + effects = [] + series_generator = HumiTempTimeseriesGenerator() + # series1 will be a true anomaly for the minmax + series1 = series_generator.generate(anomalies=['step_uv'],effects=effects) + # series2 will be a false positive for minmax (it sees always 2 anomalous data points for each variable) + series2 = series_generator.generate(anomalies=['pattern_uv'],effects=effects) + dataset = [series1,series2] + evaluator = Evaluator(test_data=dataset) + minmax1 = MinMaxAnomalyDetector() + minmax2 = MinMaxAnomalyDetector() + minmax3 = MinMaxAnomalyDetector() + models={'detector_1': minmax1, + 'detector_2': minmax2, + 'detector_3': minmax3 + } + evaluation_results = evaluator.evaluate(models=models,granularity='series') + # Evaluation_results: + # detector_1: {'step_uv': 0.5, 'pattern_uv': 0.5, 'false_positives': 0.0} + # detector_2: {'step_uv': 0.5, 'pattern_uv': 0.5, 'false_positives': 0.0} + # detector_3: {'step_uv': 0.5, 'pattern_uv': 0.5, 'false_positives': 0.0} self.assertIsInstance(evaluation_results,dict) self.assertEqual(len(evaluation_results),3) self.assertEqual(len(evaluation_results['detector_1']),3) self.assertEqual(len(evaluation_results['detector_2']),3) self.assertEqual(len(evaluation_results['detector_3']),3) + self.assertAlmostEqual(evaluation_results['detector_1']['step_uv'],0.5) + self.assertAlmostEqual(evaluation_results['detector_1']['pattern_uv'],0.5) + self.assertAlmostEqual(evaluation_results['detector_1']['false_positives'],0.0) + + def test_raised_error_evaluation_series_granularity(self): + anomalies = ['step_uv','spike_uv'] + series_generator = HumiTempTimeseriesGenerator() + series = series_generator.generate(anomalies=anomalies) + dataset = [series] + minmax = MinMaxAnomalyDetector() + evaluator = Evaluator(test_data=dataset) + try: + evaluation_result = evaluator.evaluate(models={'detector':minmax},granularity='series') + except Exception as e: + self.assertIsInstance(e,ValueError) def test_copy_dataset(self): series_generator = HumiTempTimeseriesGenerator() @@ -300,3 +445,65 @@ def test_copy_dataset(self): self.assertEqual(len(dataset_copies[0]),2) self.assertIsInstance(dataset_copies[1],list) self.assertEqual(len(dataset_copies[1]),2) + + def test_variable_granularity_evaluation(self): + series_generator = HumiTempTimeseriesGenerator() + series = series_generator.generate(anomalies=['step_uv']) + minmax = MinMaxAnomalyDetector() + formatted_series,anomaly_labels = _format_for_anomaly_detector(series,synthetic=True) + flagged_series = minmax.apply(formatted_series) + evaluation_result = _variable_granularity_evaluation(flagged_series,anomaly_labels) + # evaluation_result: + # { 'step_uv': 0.00034722222222222224 + # 'false_positives': 0.00034722222222222224 + # } + self.assertEqual(len(evaluation_result),2) + self.assertAlmostEqual(evaluation_result['step_uv'],1/len(series)) + self.assertAlmostEqual(evaluation_result['false_positives'],1/len(series)) + + def test_point_granularity_evaluation(self): + series_generator = HumiTempTimeseriesGenerator() + series = series_generator.generate(anomalies=['step_uv']) + minmax = MinMaxAnomalyDetector() + formatted_series,anomaly_labels = _format_for_anomaly_detector(series,synthetic=True) + flagged_series = minmax.apply(formatted_series) + evaluation_result = _point_granularity_evaluation(flagged_series,anomaly_labels) + # evaluation_result: + # { 'step_uv': 0.0006944444444444445 + # 'false_positives': 0.0006944444444444445 + # } + self.assertEqual(len(evaluation_result),2) + self.assertAlmostEqual(evaluation_result['step_uv'],2/len(series)) + self.assertAlmostEqual(evaluation_result['false_positives'],2/len(series)) + + def test_series_granularity_evaluation(self): + series_generator = HumiTempTimeseriesGenerator() + series = series_generator.generate(anomalies=['step_uv']) + minmax = MinMaxAnomalyDetector() + formatted_series,anomaly_labels = _format_for_anomaly_detector(series,synthetic=True) + flagged_series = minmax.apply(formatted_series) + evaluation_result = _series_granularity_evaluation(flagged_series,anomaly_labels) + # evaluation_result: + # { 'step_uv': 1 + # } + self.assertEqual(len(evaluation_result),1) + self.assertAlmostEqual(evaluation_result['step_uv'],1) + + series1 = series_generator.generate(anomalies=[]) + minmax1 = MinMaxAnomalyDetector() + formatted_series1,anomaly_labels1 = _format_for_anomaly_detector(series1,synthetic=True) + flagged_series1 = minmax.apply(formatted_series1) + evaluation_result1 = _series_granularity_evaluation(flagged_series1,anomaly_labels1) + self.assertEqual(len(evaluation_result1),1) + self.assertAlmostEqual(evaluation_result1['false_positives'],1) + # evaluation_result1: + # { 'false_positives': 1 + # } + + try: + series2 = series_generator.generate(anomalies=['spike_uv','step_uv']) + formatted_series2,anomaly_labels2 = _format_for_anomaly_detector(series2,synthetic=True) + flagged_series2 = minmax.apply(formatted_series2) + evaluation_result2 = _series_granularity_evaluation(flagged_series2,anomaly_labels2) + except Exception as e: + self.assertIsInstance(e,ValueError)