diff --git a/src/timeseriesflattener/v1/aggregation_fns.py b/src/timeseriesflattener/v1/aggregation_fns.py index 3b460926..b582036d 100644 --- a/src/timeseriesflattener/v1/aggregation_fns.py +++ b/src/timeseriesflattener/v1/aggregation_fns.py @@ -1,7 +1,6 @@ """Functions for resolving multiple values in a time-series into a single value.""" - from typing import Callable import catalogue @@ -59,6 +58,10 @@ def count(grouped_df: DataFrameGroupBy) -> DataFrame: return grouped_df.count() +def unique_count(grouped_df: DataFrameGroupBy) -> DataFrame: + return grouped_df.nunique() + + def variance(grouped_df: DataFrameGroupBy) -> DataFrame: return grouped_df.var() diff --git a/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py b/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py index e76d980a..b05ec819 100644 --- a/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py +++ b/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py @@ -1,12 +1,12 @@ """Tests of aggregation strategies.""" - import numpy as np from timeseriesflattener.v1.aggregation_fns import ( boolean, change_per_day, count, + unique_count, earliest, latest, maximum, @@ -237,6 +237,29 @@ def test_aggregation_count(): ) +def test_aggregation_unique_count(): + prediction_times_str = """entity_id,timestamp, + 1,2021-12-31 00:00:00 + """ + predictor_df_str = """entity_id,timestamp,value, + 1,2021-12-30 00:00:01, 1 + 1,2021-12-30 00:00:02, 2, + 1,2021-12-30 00:00:03, 2, + """ + + assert_flattened_data_as_expected( + prediction_times_df=prediction_times_str, + output_spec=PredictorSpec( + feature_base_name="value", + timeseries_df=str_to_df(predictor_df_str), + aggregation_fn=unique_count, + lookbehind_days=2, + fallback=0, + ), + expected_values=[2], + ) + + def test_aggregation_bool(): prediction_times_str = """entity_id,timestamp, 1,2021-12-31 00:00:00