Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix minimum distance filtering for varying vertical coordinates #452

Merged
merged 9 commits into from
Oct 17, 2024
167 changes: 87 additions & 80 deletions tobac/feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -1121,39 +1121,34 @@
elif i_threshold == 0:
regions_old = regions_i

if statistic:
# reconstruct the labeled regions based on the regions dict
labels = np.zeros(track_data.shape)
labels = labels.astype(int)
for key in regions_old.keys():
labels.ravel()[regions_old[key]] = key
# apply function to get statistics based on labeled regions and functions provided by the user
# the feature dataframe is updated by appending a column for each metric
if statistics_unsmoothed:
w-k-jones marked this conversation as resolved.
Show resolved Hide resolved
features_thresholds = get_statistics(
features_thresholds,
labels,
data_i.core_data(),
statistic=statistic,
index=np.unique(labels[labels > 0]),
id_column="idx",
)
else:
features_thresholds = get_statistics(
features_thresholds,
labels,
track_data,
statistic=statistic,
index=np.unique(labels[labels > 0]),
id_column="idx",
)

logging.debug(
"Finished feature detection for threshold "
+ str(i_threshold)
+ " : "
+ str(threshold_i)
)

if statistic:
# reconstruct the labeled regions based on the regions dict
labels = np.zeros(track_data.shape)
labels = labels.astype(int)
for key in regions_old.keys():
labels.ravel()[regions_old[key]] = key
# apply function to get statistics based on labeled regions and functions provided by the user
# the feature dataframe is updated by appending a column for each metric

# select which data to use according to statistics_unsmoothed option
stats_data = data_i.core_data() if statistics_unsmoothed else track_data

features_thresholds = get_statistics(
features_thresholds,
labels,
stats_data,
statistic=statistic,
index=np.unique(labels[labels > 0]),
id_column="idx",
)

return features_thresholds


Expand Down Expand Up @@ -1296,6 +1291,9 @@
if detect_subset is not None and ndim_time in detect_subset:
raise NotImplementedError("Cannot subset on time")

# Remember if dz is set and not vertical coord for min distance filtering
use_dz_for_filtering = dz is not None

if is_3D:
# We need to determine the time axis so that we can determine the
# vertical axis in each timestep if vertical_axis is not none.
Expand Down Expand Up @@ -1395,30 +1393,7 @@
statistic=statistic,
statistics_unsmoothed=statistics_unsmoothed,
)
# check if list of features is not empty, then merge features from different threshold
# values into one DataFrame and append to list for individual timesteps:
if not features_thresholds.empty:
hdim1_ax, hdim2_ax = internal_utils.find_hdim_axes_3D(
field_in, vertical_coord=vertical_coord
)
hdim1_max = field_in.shape[hdim1_ax] - 1
hdim2_max = field_in.shape[hdim2_ax] - 1
# Loop over DataFrame to remove features that are closer than distance_min to each
# other:
if min_distance > 0:
features_thresholds = filter_min_distance(
features_thresholds,
dxy=dxy,
dz=dz,
min_distance=min_distance,
z_coordinate_name=vertical_coord,
target=target,
PBC_flag=PBC_flag,
min_h1=0,
max_h1=hdim1_max,
min_h2=0,
max_h2=hdim2_max,
)

list_features_timesteps.append(features_thresholds)

logging.debug(
Expand All @@ -1440,9 +1415,41 @@
)
else:
features = add_coordinates(features, field_in)

# Loop over DataFrame to remove features that are closer than distance_min to each
# other:
filtered_features = []
if min_distance > 0:
hdim1_ax, hdim2_ax = internal_utils.find_hdim_axes_3D(

Check warning on line 1423 in tobac/feature_detection.py

View check run for this annotation

Codecov / codecov/patch

tobac/feature_detection.py#L1423

Added line #L1423 was not covered by tests
field_in, vertical_coord=vertical_coord
)
hdim1_max = field_in.shape[hdim1_ax] - 1
hdim2_max = field_in.shape[hdim2_ax] - 1

Check warning on line 1427 in tobac/feature_detection.py

View check run for this annotation

Codecov / codecov/patch

tobac/feature_detection.py#L1426-L1427

Added lines #L1426 - L1427 were not covered by tests

for _, features_frame in features.groupby("frame"):
filtered_features.append(

Check warning on line 1430 in tobac/feature_detection.py

View check run for this annotation

Codecov / codecov/patch

tobac/feature_detection.py#L1429-L1430

Added lines #L1429 - L1430 were not covered by tests
filter_min_distance(
features_frame,
dxy=dxy,
dz=dz if use_dz_for_filtering else None,
min_distance=min_distance,
z_coordinate_name=(
None if use_dz_for_filtering else vertical_coord
),
target=target,
PBC_flag=PBC_flag,
min_h1=0,
max_h1=hdim1_max,
min_h2=0,
max_h2=hdim2_max,
)
)
features = pd.concat(filtered_features, ignore_index=True)

Check warning on line 1447 in tobac/feature_detection.py

View check run for this annotation

Codecov / codecov/patch

tobac/feature_detection.py#L1447

Added line #L1447 was not covered by tests

else:
features = None
logging.debug("No features detected")

logging.debug("feature detection completed")
return features

Expand Down Expand Up @@ -1512,47 +1519,47 @@
pandas DataFrame
features after filtering
"""
# Optional coordinate names are not yet implemented, set to defaults here:
if dxy is None:
raise NotImplementedError("dxy currently must be set.")

# if PBC_flag != "none":
# raise NotImplementedError("We haven't yet implemented PBCs into this.")

# if we are 3D, the vertical dimension is in features. if we are 2D, there
# is no vertical dimension in features.
is_3D = "vdim" in features

if is_3D and dz is None:
z_coordinate_name = internal_utils.find_dataframe_vertical_coord(
features, z_coordinate_name
)

# Check if both dxy and their coordinate names are specified.
# If they are, warn that we will use dxy.
if dxy is not None and (
x_coordinate_name in features and y_coordinate_name in features
):
elif x_coordinate_name in features and y_coordinate_name in features:
warnings.warn(
"Both " + x_coordinate_name + "/" + y_coordinate_name + " and dxy "
"set. Using constant dxy. Set dxy to None if you want to use the "
"interpolated coordinates, or set `x_coordinate_name` and "
"`y_coordinate_name` to None to use a constant dxy."
)
y_coordinate_name = "hdim_1"
x_coordinate_name = "hdim_2"

Check warning on line 1536 in tobac/feature_detection.py

View check run for this annotation

Codecov / codecov/patch

tobac/feature_detection.py#L1535-L1536

Added lines #L1535 - L1536 were not covered by tests
# If dxy only, use hdim_1, hdim_1 as default horizontal dimensions
else:
y_coordinate_name = "hdim_1"
x_coordinate_name = "hdim_2"

# Check and if both dz is specified and altitude is available, warn that we will use dz.
if is_3D and (dz is not None and z_coordinate_name in features):
warnings.warn(
"Both "
+ z_coordinate_name
+ " and dz available to filter_min_distance; using constant dz. "
"Set dz to none if you want to use altitude or set `z_coordinate_name` to None to use "
"constant dz."
)

# As optional coordinate names are not yet implemented, set to defaults here:
z_coordinate_name = "vdim"
y_coordinate_name = "hdim_1"
x_coordinate_name = "hdim_2"
# if we are 3D, the vertical dimension is in features
is_3D = "vdim" in features
if is_3D:
if dz is None:
# Find vertical coord name and set dz to 1
z_coordinate_name = internal_utils.find_dataframe_vertical_coord(
variable_dataframe=features, vertical_coord=z_coordinate_name
)
dz = 1
else:
# Use dz, warn if both are set
if z_coordinate_name is not None:
warnings.warn(

Check warning on line 1554 in tobac/feature_detection.py

View check run for this annotation

Codecov / codecov/patch

tobac/feature_detection.py#L1554

Added line #L1554 was not covered by tests
"Both "
+ z_coordinate_name
+ " and dz available to filter_min_distance; using constant dz. "
"Set dz to none if you want to use altitude or set `z_coordinate_name` to None to use "
"constant dz.",
UserWarning,
)
z_coordinate_name = "vdim"

if target not in ["minimum", "maximum"]:
raise ValueError(
Expand Down
50 changes: 38 additions & 12 deletions tobac/tests/test_feature_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,34 @@ def test_feature_detection_position(position_threshold):
True,
False,
),
( # Test using z coord name
(0, 0, 0, 4, 1),
(1, 1, 1, 4, 1),
1000,
None,
1,
"maximum",
False,
False,
True,
"none",
True,
True,
),
( # Test using z coord name
(0, 0, 0, 5, 1),
(1, 1, 1, 4, 1),
1,
None,
101,
"maximum",
False,
False,
True,
"none",
True,
False,
),
],
)
def test_filter_min_distance(
Expand Down Expand Up @@ -415,18 +443,6 @@ def test_filter_min_distance(

feat_combined = pd.concat([feat_1_interp, feat_2_interp], ignore_index=True)

filter_dist_opts = dict()

if add_x_coords:
feat_combined[x_coord_name] = feat_combined["hdim_2"] * assumed_dxy
filter_dist_opts["x_coordinate_name"] = x_coord_name
if add_y_coords:
feat_combined[y_coord_name] = feat_combined["hdim_1"] * assumed_dxy
filter_dist_opts["y_coordinate_name"] = y_coord_name
if add_z_coords and is_3D:
feat_combined[z_coord_name] = feat_combined["vdim"] * assumed_dz
filter_dist_opts["z_coordinate_name"] = z_coord_name

filter_dist_opts = {
"features": feat_combined,
"dxy": dxy,
Expand All @@ -439,6 +455,16 @@ def test_filter_min_distance(
"min_h2": 0,
"max_h2": 100,
}
if add_x_coords:
feat_combined[x_coord_name] = feat_combined["hdim_2"] * assumed_dxy
filter_dist_opts["x_coordinate_name"] = x_coord_name
if add_y_coords:
feat_combined[y_coord_name] = feat_combined["hdim_1"] * assumed_dxy
filter_dist_opts["y_coordinate_name"] = y_coord_name
if add_z_coords and is_3D:
feat_combined[z_coord_name] = feat_combined["vdim"] * assumed_dz
filter_dist_opts["z_coordinate_name"] = z_coord_name

if target not in ["maximum", "minimum"]:
with pytest.raises(ValueError):
out_feats = feat_detect.filter_min_distance(**filter_dist_opts)
Expand Down
Loading