added time padding for segmentation

tobac-project · Oct 28, 2024 · beaf022 · beaf022
1 parent e39d736
commit beaf022
Show file tree

Hide file tree

Showing 3 changed files with 120 additions and 7 deletions.
diff --git a/tobac/segmentation/watershed_segmentation.py b/tobac/segmentation/watershed_segmentation.py
@@ -39,7 +39,7 @@
 import numpy as np
 import pandas as pd
 from typing_extensions import Literal
-from typing import Union, Callable
+from typing import Union, Callable, Optional
 
 import skimage
 import numpy as np
@@ -417,12 +417,11 @@ def segmentation_timestep(
     statistics: boolean, optional
         Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
 
-
     Returns
     -------
-    segmentation_out : iris.cube.Cube
+    segmentation_out : xarray.DataArray
         Mask, 0 outside and integer numbers according to track
-        inside the ojects.
+        inside the objects.
 
     features_out : pandas.DataFrame
         Feature dataframe including the number of cells (2D or 3D) in
@@ -1135,6 +1134,7 @@ def segmentation(
     segment_number_below_threshold: int = 0,
     segment_number_unassigned: int = 0,
     statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
+    time_padding: Optional[datetime.timedelta] = datetime.timedelta(seconds=0.5),
 ) -> tuple[xr.Dataset, pd.DataFrame]:
     """Use watershedding to determine region above a threshold
     value around initial seeding position for all time steps of
@@ -1207,6 +1207,11 @@ def segmentation(
     statistic : dict, optional
         Default is None. Optional parameter to calculate bulk statistics within feature detection.
         Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
+    time_padding: timedelta, optional
+        If set, allows for segmentation to be associated with a feature input
+        timestep that is time_padding off of the feature. Extremely useful when
+        converting between micro- and nanoseconds, as is common when using Pandas
+        dataframes.
 
     Returns
     -------
@@ -1225,7 +1230,6 @@ def segmentation(
         in coords.
     """
     import pandas as pd
-    from iris.cube import CubeList
 
     time_var_name: str = "time"
     seg_out_type: str = "int64"
@@ -1264,8 +1268,13 @@ def segmentation(
 
     for i_time, time_i in enumerate(field.coords[time_var_name]):
         field_at_time = field.isel({time_var_name: i_time})
-        # TODO: allow more variability in time than exactly equal.
-        features_i = features.loc[all_times == time_i.values]
+        if time_padding is not None:
+            padded_conv = pd.Timedelta(time_padding).to_timedelta64()
+            min_time = time_i.values - padded_conv
+            max_time = time_i.values + padded_conv
+            features_i = features.loc[all_times.between(min_time, max_time)]
+        else:
+            features_i = features.loc[all_times == time_i.values]
         segmentation_out_i, features_out_i = segmentation_timestep(
             field_at_time,
             features_i,

diff --git a/tobac/tests/test_segmentation.py → ...s/segmentation_tests/test_segmentation.py b/tobac/tests/test_segmentation.py → ...s/segmentation_tests/test_segmentation.py
diff --git a/tobac/tests/segmentation_tests/test_segmentation_time_pad.py b/tobac/tests/segmentation_tests/test_segmentation_time_pad.py
@@ -0,0 +1,104 @@
+"""Tests for time padding of segmentation
+"""
+import datetime
+import pytest
+from typing import Optional
+import numpy as np
+import tobac.testing as tb_test
+import tobac.segmentation.watershed_segmentation as watershed_segmentation
+import tobac.feature_detection as feature_detection
+
+
+@pytest.mark.parametrize(
+    "time_pad_setting, time_offset, expect_output,",
+    [
+        (datetime.timedelta(seconds=1), datetime.timedelta(seconds=0), True),
+        (datetime.timedelta(seconds=1), datetime.timedelta(seconds=2), False),
+        (datetime.timedelta(seconds=0), datetime.timedelta(seconds=2), False),
+        (datetime.timedelta(seconds=3), datetime.timedelta(seconds=2), True),
+        (datetime.timedelta(seconds=2), datetime.timedelta(seconds=1), True),
+        (datetime.timedelta(seconds=0), datetime.timedelta(seconds=0), True),
+        (None, datetime.timedelta(seconds=0), True),
+        (None, datetime.timedelta(seconds=1), False),
+    ],
+)
+def test_watershed_segmentation_time_pad(
+    time_pad_setting: Optional[datetime.timedelta],
+    time_offset: datetime.timedelta,
+    expect_output: bool,
+):
+    """Tests tobac.watershed_segmentation for time padding working correctly."""
+    test_dset_size = (50, 50)
+    test_hdim_1_pt = 20.0
+    test_hdim_2_pt = 20.0
+    test_hdim_1_sz = 5
+    test_hdim_2_sz = 5
+    size_feature1 = test_hdim_1_sz * test_hdim_2_sz
+    test_amp = 2
+    test_min_num = 2
+
+    test_data = np.zeros(test_dset_size)
+    test_data = tb_test.make_feature_blob(
+        test_data,
+        test_hdim_1_pt,
+        test_hdim_2_pt,
+        h1_size=test_hdim_1_sz,
+        h2_size=test_hdim_2_sz,
+        amplitude=test_amp,
+    )
+
+    # add feature of different size
+    test_hdim_1_pt = 40.0
+    test_hdim_2_pt = 40.0
+    test_hdim_1_sz = 10
+    test_hdim_2_sz = 10
+    size_feature2 = test_hdim_1_sz * test_hdim_2_sz
+    test_amp = 10
+    test_dxy = 1
+
+    test_data = tb_test.make_feature_blob(
+        test_data,
+        test_hdim_1_pt,
+        test_hdim_2_pt,
+        h1_size=test_hdim_1_sz,
+        h2_size=test_hdim_2_sz,
+        amplitude=test_amp,
+    )
+
+    test_data_xarray = tb_test.make_dataset_from_arr(test_data, data_type="xarray")
+    test_data_xarray = test_data_xarray.assign_coords(
+        time=datetime.datetime(2020, 1, 1)
+    )
+
+    test_data_xarray = test_data_xarray.expand_dims("time")
+    # detect both features
+    fd_output = feature_detection.feature_detection_multithreshold(
+        test_data_xarray,
+        i_time=0,
+        dxy=1,
+        threshold=[1, 2, 3],
+        n_min_threshold=test_min_num,
+        target="maximum",
+        statistic={"features_mean": np.mean},
+    )
+
+    # add feature IDs to data frame for one time step
+    fd_output["feature"] = [1, 2]
+    fd_output.loc[:, "time"] += time_offset
+
+    # perform segmentation
+    out_seg_mask, out_df = watershed_segmentation.segmentation(
+        field=test_data_xarray,
+        features=fd_output,
+        dxy=test_dxy,
+        threshold=1.5,
+        time_padding=time_pad_setting,
+    )
+    out_seg_mask_arr = out_seg_mask
+    if expect_output:
+        # assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask
+        assert np.sum(out_seg_mask_arr == 1) == size_feature1
+        assert np.sum(out_seg_mask_arr == 2) == size_feature2
+    else:
+        assert np.sum(out_seg_mask_arr == 1) == 0
+        assert np.sum(out_seg_mask_arr == 2) == 0