Skip to content

Commit 509a17c

Browse files
authored
Merge pull request #361 from freemansw1/add_tests_time_cell_min
Add tests for `time_cell_min` and update documentation for the parameter
2 parents f1d3a4e + c0c55d4 commit 509a17c

File tree

4 files changed

+223
-144
lines changed

4 files changed

+223
-144
lines changed

tobac/feature_detection.py

Lines changed: 15 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -423,14 +423,14 @@ def feature_detection_threshold(
423423
Standard deviation for intial filtering step. Default is 0.5.
424424
425425
n_erosion_threshold: int, optional
426-
Number of pixel by which to erode the identified features.
426+
Number of pixels by which to erode the identified features.
427427
Default is 0.
428428
429429
n_min_threshold : int, optional
430-
Minimum number of identified features. Default is 0.
430+
Minimum number of identified contiguous pixels for a feature to be detected. Default is 0.
431431
432432
min_distance : float, optional
433-
Minimum distance between detected features (in meter). Default is 0.
433+
Minimum distance between detected features (in meters). Default is 0.
434434
435435
idx_start : int, optional
436436
Feature id to start with. Default is 0.
@@ -939,14 +939,14 @@ def feature_detection_multithreshold_timestep(
939939
Standard deviation for intial filtering step. Default is 0.5.
940940
941941
n_erosion_threshold: int, optional
942-
Number of pixel by which to erode the identified features.
942+
Number of pixels by which to erode the identified features.
943943
Default is 0.
944944
945945
n_min_threshold : int, optional
946-
Minimum number of identified features. Default is 0.
946+
Minimum number of identified contiguous pixels for a feature to be detected. Default is 0.
947947
948948
min_distance : float, optional
949-
Minimum distance between detected features (in meter). Default is 0.
949+
Minimum distance between detected features (in meters). Default is 0.
950950
951951
feature_number_start : int, optional
952952
Feature id to start with. Default is 1.
@@ -961,10 +961,10 @@ def feature_detection_multithreshold_timestep(
961961
vertical_axis: int
962962
The vertical axis number of the data.
963963
dxy : float
964-
Grid spacing in meter.
964+
Grid spacing in meters.
965965
966966
wavelength_filtering: tuple, optional
967-
Minimum and maximum wavelength for spectral filtering in meter. Default is None.
967+
Minimum and maximum wavelength for spectral filtering in meters. Default is None.
968968
969969
strict_thresholding: Bool, optional
970970
If True, a feature can only be detected if all previous thresholds have been met.
@@ -1174,25 +1174,18 @@ def feature_detection_multithreshold(
11741174
Flag choosing method used for the position of the tracked
11751175
feature. Default is 'center'.
11761176
1177-
coord_interp_kind : str, optional
1178-
The kind of interpolation for coordinates. Default is 'linear'.
1179-
For 1d interp, {'linear', 'nearest', 'nearest-up', 'zero',
1180-
'slinear', 'quadratic', 'cubic',
1181-
'previous', 'next'}.
1182-
For 2d interp, {'linear', 'cubic', 'quintic'}.
1183-
11841177
sigma_threshold: float, optional
11851178
Standard deviation for intial filtering step. Default is 0.5.
11861179
11871180
n_erosion_threshold: int, optional
1188-
Number of pixel by which to erode the identified features.
1181+
Number of pixels by which to erode the identified features.
11891182
Default is 0.
11901183
11911184
n_min_threshold : int, optional
1192-
Minimum number of identified features. Default is 0.
1185+
Minimum number of identified contiguous pixels for a feature to be detected. Default is 0.
11931186
11941187
min_distance : float, optional
1195-
Minimum distance between detected features (in meter). Default is 0.
1188+
Minimum distance between detected features (in meters). Default is 0.
11961189
11971190
feature_number_start : int, optional
11981191
Feature id to start with. Default is 1.
@@ -1441,15 +1434,15 @@ def filter_min_distance(
14411434
features: pandas DataFrame
14421435
features
14431436
dxy: float
1444-
Constant horzontal grid spacing (m).
1437+
Constant horzontal grid spacing (meters).
14451438
dz: float
1446-
Constant vertical grid spacing (m), optional. If not specified
1439+
Constant vertical grid spacing (meters), optional. If not specified
14471440
and the input is 3D, this function requires that `z_coordinate_name` is available
14481441
in the `features` input. If you specify a value here, this function assumes
14491442
that it is the constant z spacing between points, even if ```z_coordinate_name```
14501443
is specified.
14511444
min_distance: float
1452-
minimum distance between detected features (m)
1445+
minimum distance between detected features (meters)
14531446
x_coordinate_name: str
14541447
The name of the x coordinate to calculate distance based on in meters.
14551448
This is typically `projection_x_coordinate`. Currently unused.
@@ -1460,7 +1453,7 @@ def filter_min_distance(
14601453
The name of the z coordinate to calculate distance based on in meters.
14611454
This is typically `altitude`. If None, tries to auto-detect.
14621455
target: {'maximum', 'minimum'}, optional
1463-
Flag to determine if tracking is targetting minima or maxima in
1456+
Flag to determine if tracking is targeting minima or maxima in
14641457
the data. Default is 'maximum'.
14651458
PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both')
14661459
Sets whether to use periodic boundaries, and if so in which directions.
@@ -1477,7 +1470,6 @@ def filter_min_distance(
14771470
max_h2: int, optional
14781471
Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size.
14791472
1480-
14811473
Returns
14821474
-------
14831475
pandas DataFrame

tobac/segmentation.py

Lines changed: 101 additions & 104 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ def add_markers(
7070
or a box of user-set size
7171
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
7272
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
73-
integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the
74-
seed area for each dimension separately.
75-
Note: we recommend the use of odd numbers for this. If you give
73+
integer (units of number of pixels), the seed box is identical in all dimensions.
74+
If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
75+
Note: we strongly recommend the use of odd numbers for this. If you give
7676
an even number, your seed box will be biased and not centered
7777
around the feature.
7878
Note: if two seed boxes overlap, the feature that is seeded will be the
@@ -366,7 +366,7 @@ def segmentation_timestep(
366366
Default is 3e-3.
367367
368368
target : {'maximum', 'minimum'}, optional
369-
Flag to determine if tracking is targetting minima or maxima in
369+
Flag to determine if tracking is targeting minima or maxima in
370370
the data to determine from which direction to approach the threshold
371371
value. Default is 'maximum'.
372372
@@ -376,11 +376,11 @@ def segmentation_timestep(
376376
377377
method : {'watershed'}, optional
378378
Flag determining the algorithm to use (currently watershedding
379-
implemented). 'random_walk' could be uncommented.
379+
implemented).
380380
381381
max_distance : float, optional
382382
Maximum distance from a marker allowed to be classified as
383-
belonging to that cell. Default is None.
383+
belonging to that cell in meters. Default is None.
384384
385385
vertical_coord : str, optional
386386
Vertical coordinate in 3D input data. If None, input is checked for
@@ -398,14 +398,18 @@ def segmentation_timestep(
398398
or a box of user-set size
399399
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
400400
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
401-
integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the
402-
seed area for each dimension separately. Note: we recommend the use
403-
of odd numbers for this. If you give an even number, your seed box will be
404-
biased and not centered around the feature.
401+
integer (units of number of pixels), the seed box is identical in all dimensions.
402+
If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
403+
Note: we strongly recommend the use of odd numbers for this. If you give
404+
an even number, your seed box will be biased and not centered
405+
around the feature.
406+
Note: if two seed boxes overlap, the feature that is seeded will be the
407+
closer feature.
405408
segment_number_below_threshold: int
406409
the marker to use to indicate a segmentation point is below the threshold.
407410
segment_number_unassigned: int
408411
the marker to use to indicate a segmentation point is above the threshold but unsegmented.
412+
This can be the same as `segment_number_below_threshold`, but can also be set separately.
409413
statistics: boolean, optional
410414
Default is None. If True, bulk statistics for the data points assigned to each feature are saved in output.
411415
@@ -1129,100 +1133,93 @@ def segmentation(
11291133
statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None,
11301134
) -> tuple[iris.cube.Cube, pd.DataFrame]:
11311135
"""Use watershedding to determine region above a threshold
1132-
value around initial seeding position for all time steps of
1133-
the input data. Works both in 2D (based on single seeding
1134-
point) and 3D and returns a mask with zeros everywhere around
1135-
the identified regions and the feature id inside the regions.
1136-
1137-
Calls segmentation_timestep at each individal timestep of the
1138-
input data.
1139-
1140-
Parameters
1141-
----------
1142-
features : pandas.DataFrame
1143-
Output from trackpy/maketrack.
1144-
1145-
field : iris.cube.Cube
1146-
Containing the field to perform the watershedding on.
1147-
1148-
dxy : float
1149-
Grid spacing of the input data.
1150-
1151-
statistic : dict, optional
1152-
Default is None. Optional parameter to calculate bulk statistics within feature detection.
1153-
Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
1154-
1155-
boolean, optional
1156-
Default is False. If True, bulk statistics for the data points assigned to each feature are saved in output.
1157-
1158-
Output:
1159-
segmentation_out: iris.cube.Cube
1160-
Cloud mask, 0 outside and integer numbers according to track inside the cloud
1161-
=======
1162-
threshold : float, optional
1163-
Threshold for the watershedding field to be used for the mask. The watershedding is exclusive of the threshold value, i.e. values greater (less) than the threshold are included in the target region, while values equal to the threshold value are excluded.
1164-
Default is 3e-3.
1165-
1166-
target : {'maximum', 'minimum'}, optional
1167-
Flag to determine if tracking is targetting minima or maxima in
1168-
the data. Default is 'maximum'.
1169-
1170-
level : slice of iris.cube.Cube, optional
1171-
Levels at which to seed the cells for the watershedding
1172-
algorithm. Default is None.
1173-
1174-
method : {'watershed'}, optional
1175-
Flag determining the algorithm to use (currently watershedding
1176-
implemented). 'random_walk' could be uncommented.
1177-
1178-
max_distance : float, optional
1179-
Maximum distance from a marker allowed to be classified as
1180-
belonging to that cell. Default is None.
1181-
1182-
vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
1183-
'geopotential_height'}, optional
1184-
Name of the vertical coordinate for use in 3D segmentation case
1185-
1186-
PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
1187-
Sets whether to use periodic boundaries, and if so in which directions.
1188-
'none' means that we do not have periodic boundaries
1189-
'hdim_1' means that we are periodic along hdim1
1190-
'hdim_2' means that we are periodic along hdim2
1191-
'both' means that we are periodic along both horizontal dimensions
1192-
1193-
seed_3D_flag: str('column', 'box')
1194-
Seed 3D field at feature positions with either the full column (default)
1195-
or a box of user-set size
1196-
1197-
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
1198-
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
1199-
integer, the seed box is identical in all dimensions. If it's a tuple, it specifies the
1200-
seed area for each dimension separately. Note: we recommend the use
1201-
of odd numbers for this. If you give an even number, your seed box will be
1202-
biased and not centered around the feature.
1203-
segment_number_below_threshold: int
1204-
the marker to use to indicate a segmentation point is below the threshold.
1205-
segment_number_unassigned: int
1206-
the marker to use to indicate a segmentation point is above the threshold but unsegmented.
1207-
statistic: boolean, optional
1208-
Default is False. If True, bulk statistics for the data points assigned to each feature are saved in output.
1209-
1210-
1211-
Returns
1212-
-------
1213-
segmentation_out : iris.cube.Cube
1214-
Mask, 0 outside and integer numbers according to track
1215-
inside the area/volume of the feature.
1216-
1217-
features_out : pandas.DataFrame
1218-
Feature dataframe including the number of cells (2D or 3D) in
1219-
the segmented area/volume of the feature at the timestep.
1220-
1221-
Raises
1222-
------
1223-
ValueError
1224-
If field_in.ndim is neither 3 nor 4 and 'time' is not included
1225-
in coords.
1136+
value around initial seeding position for all time steps of
1137+
the input data. Works both in 2D (based on single seeding
1138+
point) and 3D and returns a mask with zeros everywhere around
1139+
the identified regions and the feature id inside the regions.
1140+
1141+
Calls segmentation_timestep at each individal timestep of the
1142+
input data.
1143+
1144+
Parameters
1145+
----------
1146+
features : pandas.DataFrame
1147+
Output from trackpy/maketrack.
1148+
1149+
field : iris.cube.Cube
1150+
Containing the field to perform the watershedding on.
1151+
1152+
dxy : float
1153+
Grid spacing of the input data in meters.
1154+
1155+
threshold : float, optional
1156+
Threshold for the watershedding field to be used for the mask.
1157+
Default is 3e-3.
1158+
1159+
target : {'maximum', 'minimum'}, optional
1160+
Flag to determine if tracking is targetting minima or maxima in
1161+
the data. Default is 'maximum'.
1162+
1163+
level : slice of iris.cube.Cube, optional
1164+
Levels at which to seed the cells for the watershedding
1165+
algorithm. Default is None.
1166+
1167+
method : {'watershed'}, optional
1168+
Flag determining the algorithm to use (currently watershedding
1169+
implemented). 'random_walk' could be uncommented.
1170+
1171+
max_distance : float, optional
1172+
Maximum distance from a marker allowed to be classified as
1173+
belonging to that cell in meters. Default is None.
1174+
1175+
vertical_coord : {'auto', 'z', 'model_level_number', 'altitude',
1176+
'geopotential_height'}, optional
1177+
Name of the vertical coordinate for use in 3D segmentation case
1178+
1179+
PBC_flag : {'none', 'hdim_1', 'hdim_2', 'both'}
1180+
Sets whether to use periodic boundaries, and if so in which directions.
1181+
'none' means that we do not have periodic boundaries
1182+
'hdim_1' means that we are periodic along hdim1
1183+
'hdim_2' means that we are periodic along hdim2
1184+
'both' means that we are periodic along both horizontal dimensions
1185+
1186+
seed_3D_flag: str('column', 'box')
1187+
Seed 3D field at feature positions with either the full column (default)
1188+
or a box of user-set size
1189+
1190+
seed_3D_size: int or tuple (dimensions equal to dimensions of `field`)
1191+
This sets the size of the seed box when `seed_3D_flag` is 'box'. If it's an
1192+
integer (units of number of pixels), the seed box is identical in all dimensions.
1193+
If it's a tuple, it specifies the seed area for each dimension separately, in units of pixels.
1194+
Note: we strongly recommend the use of odd numbers for this. If you give
1195+
an even number, your seed box will be biased and not centered
1196+
around the feature.
1197+
Note: if two seed boxes overlap, the feature that is seeded will be the
1198+
closer feature.
1199+
segment_number_below_threshold: int
1200+
the marker to use to indicate a segmentation point is below the threshold.
1201+
segment_number_unassigned: int
1202+
the marker to use to indicate a segmentation point is above the threshold but unsegmented.
1203+
statistic : dict, optional
1204+
Default is None. Optional parameter to calculate bulk statistics within feature detection.
1205+
Dictionary with callable function(s) to apply over the region of each detected feature and the name of the statistics to appear in the feature output dataframe. The functions should be the values and the names of the metric the keys (e.g. {'mean': np.mean})
1206+
1207+
1208+
Returns
1209+
-------
1210+
segmentation_out : iris.cube.Cube
1211+
Mask, 0 outside and integer numbers according to track
1212+
inside the area/volume of the feature.
1213+
1214+
features_out : pandas.DataFrame
1215+
Feature dataframe including the number of cells (2D or 3D) in
1216+
the segmented area/volume of the feature at the timestep.
1217+
1218+
Raises
1219+
------
1220+
ValueError
1221+
If field_in.ndim is neither 3 nor 4 and 'time' is not included
1222+
in coords.
12261223
"""
12271224
import pandas as pd
12281225
from iris.cube import CubeList

0 commit comments

Comments
 (0)