Skip to content

Commit

Permalink
added time padding for segmentation
Browse files Browse the repository at this point in the history
  • Loading branch information
freemansw1 committed Oct 28, 2024
1 parent e39d736 commit beaf022
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 7 deletions.
23 changes: 16 additions & 7 deletions tobac/segmentation/watershed_segmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
import numpy as np
import pandas as pd
from typing_extensions import Literal
from typing import Union, Callable
from typing import Union, Callable, Optional

import skimage
import numpy as np
Expand Down Expand Up @@ -417,12 +417,11 @@ def segmentation_timestep(
statistics: boolean, optional
Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
Returns
-------
segmentation_out : iris.cube.Cube
segmentation_out : xarray.DataArray
Mask, 0 outside and integer numbers according to track
inside the ojects.
inside the objects.
features_out : pandas.DataFrame
Feature dataframe including the number of cells (2D or 3D) in
Expand Down Expand Up @@ -1135,6 +1134,7 @@ def segmentation(
segment_number_below_threshold: int = 0,
segment_number_unassigned: int = 0,
statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
time_padding: Optional[datetime.timedelta] = datetime.timedelta(seconds=0.5),
) -> tuple[xr.Dataset, pd.DataFrame]:
"""Use watershedding to determine region above a threshold
value around initial seeding position for all time steps of
Expand Down Expand Up @@ -1207,6 +1207,11 @@ def segmentation(
statistic : dict, optional
Default is None. Optional parameter to calculate bulk statistics within feature detection.
Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
time_padding: timedelta, optional
If set, allows for segmentation to be associated with a feature input
timestep that is time_padding off of the feature. Extremely useful when
converting between micro- and nanoseconds, as is common when using Pandas
dataframes.
Returns
-------
Expand All @@ -1225,7 +1230,6 @@ def segmentation(
in coords.
"""
import pandas as pd
from iris.cube import CubeList

time_var_name: str = "time"
seg_out_type: str = "int64"
Expand Down Expand Up @@ -1264,8 +1268,13 @@ def segmentation(

for i_time, time_i in enumerate(field.coords[time_var_name]):
field_at_time = field.isel({time_var_name: i_time})
# TODO: allow more variability in time than exactly equal.
features_i = features.loc[all_times == time_i.values]
if time_padding is not None:
padded_conv = pd.Timedelta(time_padding).to_timedelta64()
min_time = time_i.values - padded_conv
max_time = time_i.values + padded_conv
features_i = features.loc[all_times.between(min_time, max_time)]
else:
features_i = features.loc[all_times == time_i.values]
segmentation_out_i, features_out_i = segmentation_timestep(
field_at_time,
features_i,
Expand Down
File renamed without changes.
104 changes: 104 additions & 0 deletions tobac/tests/segmentation_tests/test_segmentation_time_pad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
"""Tests for time padding of segmentation
"""
import datetime
import pytest
from typing import Optional
import numpy as np
import tobac.testing as tb_test
import tobac.segmentation.watershed_segmentation as watershed_segmentation
import tobac.feature_detection as feature_detection


@pytest.mark.parametrize(
"time_pad_setting, time_offset, expect_output,",
[
(datetime.timedelta(seconds=1), datetime.timedelta(seconds=0), True),
(datetime.timedelta(seconds=1), datetime.timedelta(seconds=2), False),
(datetime.timedelta(seconds=0), datetime.timedelta(seconds=2), False),
(datetime.timedelta(seconds=3), datetime.timedelta(seconds=2), True),
(datetime.timedelta(seconds=2), datetime.timedelta(seconds=1), True),
(datetime.timedelta(seconds=0), datetime.timedelta(seconds=0), True),
(None, datetime.timedelta(seconds=0), True),
(None, datetime.timedelta(seconds=1), False),
],
)
def test_watershed_segmentation_time_pad(
time_pad_setting: Optional[datetime.timedelta],
time_offset: datetime.timedelta,
expect_output: bool,
):
"""Tests tobac.watershed_segmentation for time padding working correctly."""
test_dset_size = (50, 50)
test_hdim_1_pt = 20.0
test_hdim_2_pt = 20.0
test_hdim_1_sz = 5
test_hdim_2_sz = 5
size_feature1 = test_hdim_1_sz * test_hdim_2_sz
test_amp = 2
test_min_num = 2

test_data = np.zeros(test_dset_size)
test_data = tb_test.make_feature_blob(
test_data,
test_hdim_1_pt,
test_hdim_2_pt,
h1_size=test_hdim_1_sz,
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)

# add feature of different size
test_hdim_1_pt = 40.0
test_hdim_2_pt = 40.0
test_hdim_1_sz = 10
test_hdim_2_sz = 10
size_feature2 = test_hdim_1_sz * test_hdim_2_sz
test_amp = 10
test_dxy = 1

test_data = tb_test.make_feature_blob(
test_data,
test_hdim_1_pt,
test_hdim_2_pt,
h1_size=test_hdim_1_sz,
h2_size=test_hdim_2_sz,
amplitude=test_amp,
)

test_data_xarray = tb_test.make_dataset_from_arr(test_data, data_type="xarray")
test_data_xarray = test_data_xarray.assign_coords(
time=datetime.datetime(2020, 1, 1)
)

test_data_xarray = test_data_xarray.expand_dims("time")
# detect both features
fd_output = feature_detection.feature_detection_multithreshold(
test_data_xarray,
i_time=0,
dxy=1,
threshold=[1, 2, 3],
n_min_threshold=test_min_num,
target="maximum",
statistic={"features_mean": np.mean},
)

# add feature IDs to data frame for one time step
fd_output["feature"] = [1, 2]
fd_output.loc[:, "time"] += time_offset

# perform segmentation
out_seg_mask, out_df = watershed_segmentation.segmentation(
field=test_data_xarray,
features=fd_output,
dxy=test_dxy,
threshold=1.5,
time_padding=time_pad_setting,
)
out_seg_mask_arr = out_seg_mask
if expect_output:
# assure that the number of grid cells belonging to each feature (ncells) are consistent with segmentation mask
assert np.sum(out_seg_mask_arr == 1) == size_feature1
assert np.sum(out_seg_mask_arr == 2) == size_feature2
else:
assert np.sum(out_seg_mask_arr == 1) == 0
assert np.sum(out_seg_mask_arr == 2) == 0

0 comments on commit beaf022

Please sign in to comment.