From 4992d87e8318193f2d831802408d061cbdaf4d6c Mon Sep 17 00:00:00 2001
From: bokajgd <bokajgd@gmail.com>
Date: Wed, 4 Oct 2023 13:51:57 +0200
Subject: [PATCH] test: add test

---
 .../test_flattened_dataset/test_add_values.py | 58 ++++++++++++++++++-
 1 file changed, 56 insertions(+), 2 deletions(-)

diff --git a/src/timeseriesflattener/tests/test_timeseriesflattener/test_flattened_dataset/test_add_values.py b/src/timeseriesflattener/tests/test_timeseriesflattener/test_flattened_dataset/test_add_values.py
index 5c467e0f..68fae828 100644
--- a/src/timeseriesflattener/tests/test_timeseriesflattener/test_flattened_dataset/test_add_values.py
+++ b/src/timeseriesflattener/tests/test_timeseriesflattener/test_flattened_dataset/test_add_values.py
@@ -1,12 +1,13 @@
 """Tests for adding values to a flattened dataset."""
 
+import datetime as dt
 
 import numpy as np
 import pandas as pd
 import pytest
 
 from timeseriesflattener import TimeseriesFlattener
-from timeseriesflattener.aggregation_fns import maximum, minimum
+from timeseriesflattener.aggregation_fns import latest, maximum, minimum
 from timeseriesflattener.feature_specs.single_specs import (
     OutcomeSpec,
     PredictorSpec,
@@ -499,7 +500,6 @@ def test_add_temporal_predictors_then_temporal_outcome():
             check_dtype=False,
         )
 
-
 def test_add_temporal_incident_binary_outcome():
     prediction_times_str = """entity_id,timestamp,
                             1,2021-11-05 00:00:00
@@ -548,3 +548,57 @@ def test_add_temporal_incident_binary_outcome():
                 df[col] = df[col].astype("int32")
 
         pd.testing.assert_series_equal(outcome_df[col], expected_df[col])
+
+
+def test_add_outcome_timestamps():
+    prediction_times_str = """entity_id,timestamp,
+                            1,2021-11-05 00:00:00
+                            1,2021-11-01 00:00:00
+                            1,2023-11-05 00:00:00
+                            """
+
+    event_times_str = """entity_id,timestamp,value,
+                        1,2021-11-06 00:00:01,2021-11-06 00:00:01
+                        1,2021-11-13 00:00:01,2021-11-13 00:00:01
+                        """
+
+    expected_df_str = """entity_id,outc_timestamp_within_10_days_latest_fallback_nan_dichotomous,
+    0,2021-11-13 00:00:01
+    1,2021-11-06 00:00:01
+    2,
+    """
+
+    prediction_times_df = str_to_df(prediction_times_str)
+    event_times_df = str_to_df(event_times_str)
+    expected_df = str_to_df(expected_df_str)
+    expected_df['outc_timestamp_within_10_days_latest_fallback_nan_dichotomous'] = expected_df['outc_timestamp_within_10_days_latest_fallback_nan_dichotomous'].astype(str).replace('NaT', np.NaN)
+
+    flattened_dataset = TimeseriesFlattener(
+        prediction_times_df=prediction_times_df,
+        timestamp_col_name="timestamp",
+        entity_id_col_name="entity_id",
+        n_workers=4,
+        drop_pred_times_with_insufficient_look_distance=False,
+    )
+
+    flattened_dataset.add_spec(
+        spec=OutcomeSpec(
+            timeseries_df=event_times_df,
+            lookahead_days=10,
+            incident=False,
+            fallback=np.NaN,
+            feature_base_name="timestamp",
+            aggregation_fn=latest,
+        ),
+    )
+
+    outcome_df = flattened_dataset.get_df()
+
+    for col in [c for c in expected_df.columns if "outc" in c]:
+        for df in (outcome_df, expected_df):
+            # Windows and Linux have different default dtypes for ints,
+            # which is not a meaningful error here. So we force the dtype.
+            if df[col].dtype == "int64":
+                df[col] = df[col].astype("int32")
+
+        pd.testing.assert_series_equal(outcome_df[col], expected_df[col])