From beaf022d359790420dabd8ae3264e90e03bf4305 Mon Sep 17 00:00:00 2001
From: Sean Freeman <freemansw1@gmail.com>
Date: Mon, 28 Oct 2024 09:06:48 -0500
Subject: [PATCH] added time padding for segmentation

---
 tobac/segmentation/watershed_segmentation.py  |  23 ++--
 .../test_segmentation.py                      |   0
 .../test_segmentation_time_pad.py             | 104 ++++++++++++++++++
 3 files changed, 120 insertions(+), 7 deletions(-)
 rename tobac/tests/{ => segmentation_tests}/test_segmentation.py (100%)
 create mode 100644 tobac/tests/segmentation_tests/test_segmentation_time_pad.py

diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py
index b2fa7a54..905fc736 100644
--- a/tobac/segmentation/watershed_segmentation.py
+++ b/tobac/segmentation/watershed_segmentation.py
@@ -39,7 +39,7 @@
 import numpy as np
 import pandas as pd
 from typing_extensions import Literal
-from typing import Union, Callable
+from typing import Union, Callable, Optional
 
 import skimage
 import numpy as np
@@ -417,12 +417,11 @@ def segmentation_timestep(
     statistics: boolean, optional
         Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
 
-
     Returns
     -------
-    segmentation_out : iris.cube.Cube
+    segmentation_out : xarray.DataArray
         Mask, 0 outside and integer numbers according to track
-        inside the ojects.
+        inside the objects.
 
     features_out : pandas.DataFrame
         Feature dataframe including the number of cells (2D or 3D) in
@@ -1135,6 +1134,7 @@ def segmentation(
     segment_number_below_threshold: int = 0,
     segment_number_unassigned: int = 0,
     statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
+    time_padding: Optional[datetime.timedelta] = datetime.timedelta(seconds=0.5),
 ) -> tuple[xr.Dataset, pd.DataFrame]:
     """Use watershedding to determine region above a threshold
     value around initial seeding position for all time steps of
@@ -1207,6 +1207,11 @@ def segmentation(
     statistic : dict, optional
         Default is None. Optional parameter to calculate bulk statistics within feature detection.
         Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
+    time_padding: timedelta, optional
+        If set, allows for segmentation to be associated with a feature input
+        timestep that is time_padding off of the feature. Extremely useful when
+        converting between micro- and nanoseconds, as is common when using Pandas
+        dataframes.
 
     Returns
     -------
@@ -1225,7 +1230,6 @@ def segmentation(
         in coords.
     """
     import pandas as pd
-    from iris.cube import CubeList
 
     time_var_name: str = "time"
     seg_out_type: str = "int64"
@@ -1264,8 +1268,13 @@ def segmentation(
 
     for i_time, time_i in enumerate(field.coords[time_var_name]):
         field_at_time = field.isel({time_var_name: i_time})
-        # TODO: allow more variability in time than exactly equal.
-        features_i = features.loc[all_times == time_i.values]
+        if time_padding is not None:
+            padded_conv = pd.Timedelta(time_padding).to_timedelta64()
+            min_time = time_i.values - padded_conv
+            max_time = time_i.values + padded_conv
+            features_i = features.loc[all_times.between(min_time, max_time)]
+        else:
+            features_i = features.loc[all_times == time_i.values]
         segmentation_out_i, features_out_i = segmentation_timestep(
             field_at_time,
             features_i,
diff --git a/tobac/tests/test_segmentation.py b/tobac/tests/segmentation_tests/test_segmentation.py
similarity index 100%
rename from tobac/tests/test_segmentation.py
rename to tobac/tests/segmentation_tests/test_segmentation.py
diff --git a/tobac/tests/segmentation_tests/test_segmentation_time_pad.py b/tobac/tests/segmentation_tests/test_segmentation_time_pad.py
new file mode 100644
index 00000000..79409f10
--- /dev/null
+++ b/tobac/tests/segmentation_tests/test_segmentation_time_pad.py
@@ -0,0 +1,104 @@
+"""Tests for time padding of segmentation
+"""
+import datetime
+import pytest
+from typing import Optional
+import numpy as np
+import tobac.testing as tb_test
+import tobac.segmentation.watershed_segmentation as watershed_segmentation
+import tobac.feature_detection as feature_detection
+
+
+@pytest.mark.parametrize(
+    "time_pad_setting, time_offset, expect_output,",
+    [
+        (datetime.timedelta(seconds=1), datetime.timedelta(seconds=0), True),
+        (datetime.timedelta(seconds=1), datetime.timedelta(seconds=2), False),
+        (datetime.timedelta(seconds=0), datetime.timedelta(seconds=2), False),
+        (datetime.timedelta(seconds=3), datetime.timedelta(seconds=2), True),
+        (datetime.timedelta(seconds=2), datetime.timedelta(seconds=1), True),
+        (datetime.timedelta(seconds=0), datetime.timedelta(seconds=0), True),
+        (None, datetime.timedelta(seconds=0), True),
+        (None, datetime.timedelta(seconds=1), False),
+    ],
+)
+def test_watershed_segmentation_time_pad(
+    time_pad_setting: Optional[datetime.timedelta],
+    time_offset: datetime.timedelta,
+    expect_output: bool,
+):
+    """Tests tobac.watershed_segmentation for time padding working correctly."""
+    test_dset_size = (50, 50)
+    test_hdim_1_pt = 20.0
+    test_hdim_2_pt = 20.0
+    test_hdim_1_sz = 5
+    test_hdim_2_sz = 5
+    size_feature1 = test_hdim_1_sz * test_hdim_2_sz
+    test_amp = 2
+    test_min_num = 2
+
+    test_data = np.zeros(test_dset_size)
+    test_data = tb_test.make_feature_blob(
+        test_data,
+        test_hdim_1_pt,
+        test_hdim_2_pt,
+        h1_size=test_hdim_1_sz,
+        h2_size=test_hdim_2_sz,
+        amplitude=test_amp,
+    )
+
+    # add feature of different size
+    test_hdim_1_pt = 40.0
+    test_hdim_2_pt = 40.0
+    test_hdim_1_sz = 10
+    test_hdim_2_sz = 10
+    size_feature2 = test_hdim_1_sz * test_hdim_2_sz
+    test_amp = 10
+    test_dxy = 1
+
+    test_data = tb_test.make_feature_blob(
+        test_data,
+        test_hdim_1_pt,
+        test_hdim_2_pt,
+        h1_size=test_hdim_1_sz,
+        h2_size=test_hdim_2_sz,
+        amplitude=test_amp,
+    )
+
+    test_data_xarray = tb_test.make_dataset_from_arr(test_data, data_type="xarray")
+    test_data_xarray = test_data_xarray.assign_coords(
+        time=datetime.datetime(2020, 1, 1)
+    )
+
+    test_data_xarray = test_data_xarray.expand_dims("time")
+    # detect both features
+    fd_output = feature_detection.feature_detection_multithreshold(
+        test_data_xarray,
+        i_time=0,
+        dxy=1,
+        threshold=[1, 2, 3],
+        n_min_threshold=test_min_num,
+        target="maximum",
+        statistic={"features_mean": np.mean},
+    )
+
+    # add feature IDs to data frame for one time step
+    fd_output["feature"] = [1, 2]
+    fd_output.loc[:, "time"] += time_offset
+
+    # perform segmentation
+    out_seg_mask, out_df = watershed_segmentation.segmentation(
+        field=test_data_xarray,
+        features=fd_output,
+        dxy=test_dxy,
+        threshold=1.5,
+        time_padding=time_pad_setting,
+    )
+    out_seg_mask_arr = out_seg_mask
+    if expect_output:
+        # assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask
+        assert np.sum(out_seg_mask_arr == 1) == size_feature1
+        assert np.sum(out_seg_mask_arr == 2) == size_feature2
+    else:
+        assert np.sum(out_seg_mask_arr == 1) == 0
+        assert np.sum(out_seg_mask_arr == 2) == 0