From 8dcde63ec2ed6285b7324e2863f861e81a6170ec Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn?= Date: Tue, 26 Nov 2024 13:13:23 +0100 Subject: [PATCH 1/2] fix: add unique_count agg to v1 --- src/timeseriesflattener/v1/aggregation_fns.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/timeseriesflattener/v1/aggregation_fns.py b/src/timeseriesflattener/v1/aggregation_fns.py index 3b460926..b582036d 100644 --- a/src/timeseriesflattener/v1/aggregation_fns.py +++ b/src/timeseriesflattener/v1/aggregation_fns.py @@ -1,7 +1,6 @@ """Functions for resolving multiple values in a time-series into a single value.""" - from typing import Callable import catalogue @@ -59,6 +58,10 @@ def count(grouped_df: DataFrameGroupBy) -> DataFrame: return grouped_df.count() +def unique_count(grouped_df: DataFrameGroupBy) -> DataFrame: + return grouped_df.nunique() + + def variance(grouped_df: DataFrameGroupBy) -> DataFrame: return grouped_df.var() From 469d777029c3899191537bcb342d311cd1a487a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakob=20Gr=C3=B8hn?= Date: Tue, 26 Nov 2024 13:13:34 +0100 Subject: [PATCH 2/2] test: add v1 test for unique count --- .../test_aggregation_fns.py | 25 ++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py b/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py index e76d980a..b05ec819 100644 --- a/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py +++ b/src/timeseriesflattener/v1/tests/test_timeseriesflattener/test_aggregation_fns.py @@ -1,12 +1,12 @@ """Tests of aggregation strategies.""" - import numpy as np from timeseriesflattener.v1.aggregation_fns import ( boolean, change_per_day, count, + unique_count, earliest, latest, maximum, @@ -237,6 +237,29 @@ def test_aggregation_count(): ) +def test_aggregation_unique_count(): + prediction_times_str = """entity_id,timestamp, + 1,2021-12-31 00:00:00 + """ + predictor_df_str = """entity_id,timestamp,value, + 1,2021-12-30 00:00:01, 1 + 1,2021-12-30 00:00:02, 2, + 1,2021-12-30 00:00:03, 2, + """ + + assert_flattened_data_as_expected( + prediction_times_df=prediction_times_str, + output_spec=PredictorSpec( + feature_base_name="value", + timeseries_df=str_to_df(predictor_df_str), + aggregation_fn=unique_count, + lookbehind_days=2, + fallback=0, + ), + expected_values=[2], + ) + + def test_aggregation_bool(): prediction_times_str = """entity_id,timestamp, 1,2021-12-31 00:00:00