Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
146 changes: 0 additions & 146 deletions Examples - anomaly_detectors-Copy1.ipynb

This file was deleted.

3 changes: 2 additions & 1 deletion ats/anomaly_detectors/naive/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,2 @@
from .minmax import MinMaxAnomalyDetector
from .minmax import MinMaxAnomalyDetector
from .zscore import ZScoreAnomalyDetector
67 changes: 67 additions & 0 deletions ats/anomaly_detectors/naive/zscore.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
# -*- coding: utf-8 -*-
"""Anomaly detectors"""

import pandas as pd

from ..base import AnomalyDetector

# Setup logging
import logging
logger = logging.getLogger(__name__)


class ZScoreAnomalyDetector(AnomalyDetector):

capabilities = {
'mode': 'unsupervised',
'streaming': False,
'context': 'series',
'granularity': 'variable',
'multivariate': True,
'scope': 'specific'
}

@AnomalyDetector.apply_method
def apply(self, data, inplace=False):

logger.info(f'Applying ZScoreAnomalyDetector with inplace={inplace}')

if not isinstance(data, pd.DataFrame):
raise NotImplementedError('This anomaly detector can work only on a single time series (as a Pandas DataFrames)')

timeseries_df = data

if not inplace:
timeseries_df = timeseries_df.copy()

for col in timeseries_df.columns:
anomaly_col = f"{col}_anomaly"
anomaly_flags = pd.Series(0, index=timeseries_df.index)

mean = timeseries_df[col].mean()
std = timeseries_df[col].std()
z = (timeseries_df[col] - mean) / std
# Check for all-NaN or empty z before calling idxmax/idxmin
if z.dropna().empty:
logger.warning(f"Column '{col}' has all NaN or constant values; skipping anomaly detection for this column.")
timeseries_df[anomaly_col] = anomaly_flags
continue
# Check for empty or all-NaN z-score Series before calling idxmax()/idxmin()
if z.dropna().empty:
logger.warning(f"Column '{col}' is empty or all NaN after z-score calculation; skipping anomaly marking.")
timeseries_df[anomaly_col] = anomaly_flags
continue
z = pd.Series(0, index=timeseries_df.index)
else:
z = (timeseries_df[col] - mean) / std
max_idx = z.idxmax()
min_idx = z.idxmin()

anomaly_flags[max_idx] = 1
anomaly_flags[min_idx] = 1

timeseries_df[anomaly_col] = anomaly_flags

return timeseries_df


19 changes: 18 additions & 1 deletion ats/tests/test_anomaly_detectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np
import pandas as pd

from ..anomaly_detectors.naive import MinMaxAnomalyDetector
from ..anomaly_detectors.naive import MinMaxAnomalyDetector, ZScoreAnomalyDetector
from ..anomaly_detectors.ml.ifsom import IFSOMAnomalyDetector
from ..anomaly_detectors.stat.robust import _COMNHARAnomalyDetector
from ..utils import generate_timeseries_df, load_isp_format_wide_df, wide_df_to_list_of_timeseries_df, timeseries_df_to_list_of_timeseries_df
Expand Down Expand Up @@ -47,6 +47,23 @@ def test_minmax(self):
self.assertEqual(timeseries_df_scored.loc['2025-06-10 18:00:00+00:00', 'value_2_anomaly'], 1)
self.assertEqual(timeseries_df_scored.loc['2025-06-10 21:00:00+00:00', 'value_2_anomaly'], 1)

def test_zscore(self):

anomaly_detector = ZScoreAnomalyDetector()
timeseries_df = generate_timeseries_df(entries=10, variables=2)
timeseries_df_scored = anomaly_detector.apply(timeseries_df)

self.assertEqual(timeseries_df_scored.shape, (10,4))

self.assertEqual(timeseries_df_scored.loc['2025-06-10 14:00:00+00:00', 'value_1_anomaly'], 0)
self.assertEqual(timeseries_df_scored.loc['2025-06-10 23:00:00+00:00', 'value_2_anomaly'], 0)

self.assertEqual(timeseries_df_scored.loc['2025-06-10 19:00:00+00:00', 'value_1_anomaly'], 1)
self.assertEqual(timeseries_df_scored.loc['2025-06-10 19:00:00+00:00', 'value_1_anomaly'], 1)

self.assertEqual(timeseries_df_scored.loc['2025-06-10 18:00:00+00:00', 'value_2_anomaly'], 1)
self.assertEqual(timeseries_df_scored.loc['2025-06-10 21:00:00+00:00', 'value_2_anomaly'], 1)


class TestStatAnomalyDetectors(unittest.TestCase):

Expand Down