diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index fc491ff0..c0b2c2f4 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1468,7 +1468,7 @@ def filter_min_distance( target: {'maximum', 'minimum'}, optional Flag to determine if tracking is targeting minima or maxima in the data. Default is 'maximum'. - PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both') + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both'), optional Sets whether to use periodic boundaries, and if so in which directions. 'none' means that we do not have periodic boundaries 'hdim_1' means that we are periodic along hdim1 @@ -1540,8 +1540,8 @@ def filter_min_distance( feature_locations = features[ [z_coordinate_name, y_coordinate_name, x_coordinate_name] ].to_numpy() - feature_locations[0] *= dz - feature_locations[1:] *= dxy + feature_locations[:, 0] *= dz + feature_locations[:, 1:] *= dxy else: feature_locations = ( features[[y_coordinate_name, x_coordinate_name]].to_numpy() * dxy diff --git a/tobac/merge_split.py b/tobac/merge_split.py index 6e53eb97..012856b9 100644 --- a/tobac/merge_split.py +++ b/tobac/merge_split.py @@ -7,30 +7,91 @@ """ - -def merge_split_MEST(TRACK, dxy, distance=None, frame_len=5): +import logging +from typing import Optional +from typing_extensions import Literal + +import numpy as np +import pandas as pd +import xarray as xr +import scipy.sparse +from sklearn.neighbors import BallTree + +from tobac.utils.periodic_boundaries import build_distance_function +from tobac.utils import internal as internal_utils + + +def merge_split_MEST( + tracks: pd.DataFrame, + dxy: float, + dz: Optional[float] = None, + distance: Optional[float] = None, + frame_len: int = 5, + cell_number_unassigned: int = -1, + vertical_coord: Optional[str] | None = None, + PBC_flag: Literal["none", "hdim_1", "hdim_2", "both"] = None, + min_h1: Optional[int] = None, + max_h1: Optional[int] = None, + min_h2: Optional[int] = None, + max_h2: Optional[int] = None, +) -> xr.Dataset: """ - function to postprocess tobac track data for merge/split cells using a minimum euclidian spanning tree - + Search for merging splitting cells in tobac tracking data using a minimum + euclidian spanning tree, and combine the merged cells into unique tracks. Parameters ---------- - TRACK : pandas.core.frame.DataFrame + tracks : pandas.core.frame.DataFrame Pandas dataframe of tobac Track information - dxy : float, mandatory + dxy : float The x/y grid spacing of the data. Should be in meters. + dz : float, optional + Constant vertical grid spacing (m), default None. If None, the vertical + coord will be inferred automatically or from a specified coord given by + the vertical_coord parameter. An exception is raised if both dz and + vertical_coord are provided. distance : float, optional - Distance threshold determining how close two features must be in order to consider merge/splitting. - Default is 25x the x/y grid spacing of the data, given in dxy. - The distance should be in units of meters. + Distance threshold determining how close two features must be in order + to consider merge/splitting. Default is 25x the x/y grid spacing of the + data, given in dxy. The distance should be in units of meters. frame_len : float, optional - Threshold for the maximum number of frames that can separate the end of cell and the start of a related cell. - Default is five (5) frames. + Threshold for the maximum number of frames that can separate the end of + cell and the start of a related cell, by default 5 frames. + + cell_number_unassigned: int, optional + Value given tp unassigned/non-tracked cells by tracking, by default -1. + + vertical_coord: str, optional + Name of the vertical coordinate, default None. The vertical coordinate + used must have values in meters. If None, tries to auto-detect, or uses + constant vertical grid spacing if dz is specified. An exception is + raised if both dz and vertical_coord are provided. + + PBC_flag : str('none', 'hdim_1', 'hdim_2', 'both'), optional + Sets whether to use periodic boundaries, and if so in which directions. + 'none' means that we do not have periodic boundaries + 'hdim_1' means that we are periodic along hdim1 + 'hdim_2' means that we are periodic along hdim2 + 'both' means that we are periodic along both horizontal dimensions + + min_h1: int, optional + Minimum real point in hdim_1, for use with periodic boundaries. + + max_h1: int, optional + Maximum point in hdim_1, exclusive. max_h1-min_h1 should be the size of + hdim_1. + + min_h2: int, optional + Minimum real point in hdim_2, for use with periodic boundaries. + + max_h2: int, optional + Maximum point in hdim_2, exclusive. max_h2-min_h2 should be the size of + hdim_2. Returns ------- @@ -45,7 +106,6 @@ def merge_split_MEST(TRACK, dxy, distance=None, frame_len=5): - track_child_cell_count: The total number of features belonging to all child cells of a given track id. - cell_child_feature_count: The total number of features for each cell. - Example usage: d = merge_split_MEST(Track) ds = tobac.utils.standardize_track_dataset(Track, refl_mask) @@ -54,172 +114,205 @@ def merge_split_MEST(TRACK, dxy, distance=None, frame_len=5): both_ds.to_netcdf(os.path.join(savedir,'Track_features_merges.nc')) """ - try: - import networkx as nx - except ImportError: - networkx = None - - import logging - import numpy as np - from pandas.core.common import flatten - import xarray as xr - from scipy.spatial.distance import cdist - - # Immediately convert pandas dataframe of track information to xarray: - TRACK = TRACK.to_xarray() - track_groups = TRACK.groupby("cell") + + track_groups = tracks[tracks["cell"] != cell_number_unassigned].groupby("cell") first = track_groups.first() last = track_groups.last() if distance is None: distance = dxy * 25.0 - a_names = list() - b_names = list() - dist = list() - - # write all sets of points (a and b) as Nx2 arrays - l = len(last["hdim_2"].values) - cells = first["cell"].values - a_xy = np.zeros((l, 2)) - a_xy[:, 0] = last["hdim_2"].values * dxy - a_xy[:, 1] = last["hdim_1"].values * dxy - b_xy = np.zeros((l, 2)) - b_xy[:, 0] = first["hdim_2"].values * dxy - b_xy[:, 1] = first["hdim_1"].values * dxy - # Use cdist to find distance matrix - out = cdist(a_xy, b_xy) - # Find all cells under the distance threshold - j = np.where(out <= distance) - - # Compile cells meeting the criteria to an array of both the distance and cell ids - a_names = cells[j[0]] - b_names = cells[j[1]] - dist = out[j] - - # This is inputing data to the object which will perform the spanning tree. - g = nx.Graph() - for i in np.arange(len(dist)): - g.add_edge(a_names[i], b_names[i], weight=dist[i]) - - tree = nx.minimum_spanning_edges(g) - tree_list = list(tree) - - new_tree = [] - - # Pruning the tree for time limits. - for i, j in enumerate(tree_list): - frame_a = np.nanmax(track_groups[j[0]].frame.values) - frame_b = np.nanmin(track_groups[j[1]].frame.values) - if np.abs(frame_a - frame_b) <= frame_len: - new_tree.append(tree_list[i][0:2]) - new_tree_arr = np.array(new_tree) - - TRACK["cell_parent_track_id"] = np.zeros(len(TRACK["cell"].values)) - cell_id = np.unique( - TRACK.cell.values.astype(int)[~np.isnan(TRACK.cell.values.astype(int))] - ) - track_id = dict() # same size as number of total merged tracks - - # Cleaning up tracks, combining tracks which contain the same cells. - arr = np.array([0]) - for p in cell_id: - j = np.where(arr == int(p)) - if len(j[0]) > 0: - continue + # As optional coordinate names are not yet implemented, set to defaults here: + y_coordinate_name = "hdim_1" + x_coordinate_name = "hdim_2" + + # Check if we are 3D. + is_3D = "vdim" in tracks + if is_3D: + if dz is None: + # Find vertical coord name + z_coordinate_name = internal_utils.find_dataframe_vertical_coord( + variable_dataframe=tracks, vertical_coord=vertical_coord + ) + dz = 1 else: - k = np.where(new_tree_arr == p) - if len(k[0]) == 0: - track_id[p] = [p] - arr = np.append(arr, p) + # Use dz, raise error if both are set + if vertical_coord is None: + z_coordinate_name = "vdim" else: - temp1 = list(np.unique(new_tree_arr[k[0]])) - temp = list(np.unique(new_tree_arr[k[0]])) - - for l in range(len(cell_id)): - for i in temp1: - k2 = np.where(new_tree_arr == i) - temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) - temp = list(flatten(temp)) - temp = list(np.unique(temp)) - - if len(temp1) == len(temp): - break - temp1 = np.array(temp) - - for i in temp1: - k2 = np.where(new_tree_arr == i) - temp.append(list(np.unique(new_tree_arr[k2[0]]).squeeze())) - - temp = list(flatten(temp)) - temp = list(np.unique(temp)) - arr = np.append(arr, np.unique(temp)) - - track_id[np.nanmax(np.unique(temp))] = list(np.unique(temp)) + raise ValueError( + "dz and vertical_coord both set, vertical" + " spacing is ambiguous. Set one to None." + ) + + # Calculate feature locations in cartesian coordinates + if is_3D: + cell_start_locations = np.stack( + [ + first[var].values + for var in [z_coordinate_name, y_coordinate_name, x_coordinate_name] + ], + axis=-1, + ) + cell_start_locations[:, 0] *= dz + cell_start_locations[:, 1:] *= dxy + cell_end_locations = np.stack( + [ + last[var].values + for var in [z_coordinate_name, y_coordinate_name, x_coordinate_name] + ], + axis=-1, + ) + cell_end_locations[0] *= dz + cell_end_locations[1:] *= dxy + else: + cell_start_locations = ( + np.stack( + [first[var].values for var in [y_coordinate_name, x_coordinate_name]], + axis=-1, + ) + * dxy + ) + cell_end_locations = ( + np.stack( + [last[var].values for var in [y_coordinate_name, x_coordinate_name]], + axis=-1, + ) + * dxy + ) + + if PBC_flag in ["hdim_1", "hdim_2", "both"]: + # Note that we multiply by dxy to get the distances in spatial coordinates + dist_func = build_distance_function( + min_h1 * dxy, max_h1 * dxy, min_h2 * dxy, max_h2 * dxy, PBC_flag, is_3D + ) + cell_start_tree = BallTree( + cell_start_locations, metric="pyfunc", func=dist_func + ) + + else: + cell_start_tree = BallTree(cell_start_locations, metric="euclidean") + + neighbours, distances = cell_start_tree.query_radius( + cell_end_locations, r=distance, return_distance=True + ) - cell_id = list(np.unique(TRACK.cell.values.astype(int))) - logging.debug("found cell ids") + # Input data to the graph which will perform the spanning tree. + nodes = np.repeat( + np.arange(len(neighbours), dtype=int), [len(n) for n in neighbours] + ) + neighbours = np.concatenate(neighbours) + weights = np.concatenate(distances) - cell_parent_track_id = np.zeros(len(cell_id)) - cell_parent_track_id[:] = -1 + # Remove edges where the frame gap is greater than frame_len, and also remove connections to the same cell + wh_frame_len = ( + np.abs(first["frame"].values[nodes] - last["frame"].values[neighbours]) + <= frame_len + ) + wh_valid_edge = np.logical_and(wh_frame_len, nodes != neighbours) + start_node_cells = first.index.values[nodes[wh_valid_edge]].astype(np.int32) + end_node_cells = last.index.values[neighbours[wh_valid_edge]].astype(np.int32) + + cell_id = np.unique(tracks.cell.values) + cell_id = cell_id[cell_id != cell_number_unassigned].astype(int) + max_cell = np.max(cell_id) + + if len(start_node_cells): + # We need to add a small value to the dists to prevent 0-length edges + cell_graph = scipy.sparse.coo_array( + (weights[wh_valid_edge] + 0.01, (start_node_cells, end_node_cells)), + shape=(max_cell + 1, max_cell + 1), + ) + cell_graph = scipy.sparse.csgraph.minimum_spanning_tree( + cell_graph, overwrite=True + ) + # Find remaining start/end nodes after calculating minimum spanning tree + start_node_cells, end_node_cells = cell_graph.nonzero() + + cell_parent_track_id = scipy.sparse.csgraph.connected_components(cell_graph)[1][ + cell_id + ] + cell_parent_track_id = ( + np.unique(cell_parent_track_id, return_inverse=True)[1] + 1 + ) + else: + cell_parent_track_id = np.arange(cell_id.size, dtype=int) + 1 - for i, id in enumerate(track_id, start=0): - for j in track_id[int(id)]: - cell_parent_track_id[cell_id.index(j)] = int(i) + track_dim = "track" + cell_dim = "cell" + feature_dim = "feature" + cell_parent_track_id = xr.DataArray( + cell_parent_track_id, dims=(cell_dim,), coords={cell_dim: cell_id} + ) logging.debug("found cell parent track ids") - track_ids = np.array(np.unique(cell_parent_track_id)) + track_id = np.unique(cell_parent_track_id) logging.debug("found track ids") - feature_parent_cell_id = list(TRACK.cell.values.astype(int)) - logging.debug("found feature parent cell ids") - - # # This version includes all the feature regardless of if they are used in cells or not. - feature_id = list(TRACK.feature.values.astype(int)) + # This version includes all the feature regardless of if they are used in cells or not. + feature_id = tracks.feature.values.astype(int) logging.debug("found feature ids") - feature_parent_track_id = [] - feature_parent_track_id = np.zeros(len(feature_id)) - for i, id in enumerate(feature_id): - cellid = feature_parent_cell_id[i] - if cellid < 0: - feature_parent_track_id[i] = -1 - else: - feature_parent_track_id[i] = cell_parent_track_id[cell_id.index(cellid)] + feature_parent_cell_id = tracks.cell.values.astype(int) + feature_parent_cell_id = xr.DataArray( + feature_parent_cell_id, + dims=(feature_dim,), + coords={feature_dim: feature_id}, + ) + logging.debug("found feature parent cell ids") - track_child_cell_count = np.zeros(len(track_id)) - for i, id in enumerate(track_id): - track_child_cell_count[i] = len(np.where(cell_parent_track_id == i)[0]) - logging.debug("found track child cell count") + wh_feature_in_cell = (feature_parent_cell_id != cell_number_unassigned).values + feature_parent_track_id = np.full(wh_feature_in_cell.shape, cell_number_unassigned) + feature_parent_track_id[wh_feature_in_cell] = cell_parent_track_id.loc[ + feature_parent_cell_id[wh_feature_in_cell] + ].values + feature_parent_track_id = xr.DataArray( + feature_parent_track_id, + dims=(feature_dim,), + coords={feature_dim: feature_id}, + ) - cell_child_feature_count = np.zeros(len(cell_id)) - for i, id in enumerate(cell_id): - cell_child_feature_count[i] = len(track_groups[id].feature.values) - logging.debug("found cell child feature count") + track_child_cell_count = ( + cell_parent_track_id.groupby(cell_parent_track_id).reduce(np.size).values + ) + track_child_cell_count = xr.DataArray( + track_child_cell_count, + dims=(track_dim,), + coords={track_dim: track_id}, + ) - track_dim = "track" - cell_dim = "cell" - feature_dim = "feature" + cell_child_feature_count = ( + feature_parent_cell_id[wh_feature_in_cell] + .groupby(feature_parent_cell_id[wh_feature_in_cell]) + .reduce(np.size) + .values + ) + cell_child_feature_count = xr.DataArray( + cell_child_feature_count, dims=(cell_dim), coords={cell_dim: cell_id} + ) - d = xr.Dataset( - { - "track": (track_dim, track_ids), - "cell": (cell_dim, cell_id), - "cell_parent_track_id": (cell_dim, cell_parent_track_id), - "feature": (feature_dim, feature_id), - "feature_parent_cell_id": (feature_dim, feature_parent_cell_id), - "feature_parent_track_id": (feature_dim, feature_parent_track_id), - "track_child_cell_count": (track_dim, track_child_cell_count), - "cell_child_feature_count": (cell_dim, cell_child_feature_count), - } + cell_starts_with_split = np.isin(cell_id, start_node_cells) + cell_starts_with_split = xr.DataArray( + cell_starts_with_split, dims=(cell_dim), coords={cell_dim: cell_id} ) - d = d.set_coords(["feature", "cell", "track"]) + cell_ends_with_merge = np.isin(cell_id, end_node_cells) + cell_ends_with_merge = xr.DataArray( + cell_ends_with_merge, dims=(cell_dim), coords={cell_dim: cell_id} + ) - # assert len(cell_id) == len(cell_parent_track_id) - # assert len(feature_id) == len(feature_parent_cell_id) - # assert sum(track_child_cell_count) == len(cell_id) - # assert sum(cell_child_feature_count) == len(feature_id) + merge_split_ds = xr.Dataset( + data_vars={ + "cell_parent_track_id": cell_parent_track_id, + "feature_parent_cell_id": feature_parent_cell_id, + "feature_parent_track_id": feature_parent_track_id, + "track_child_cell_count": track_child_cell_count, + "cell_child_feature_count": cell_child_feature_count, + "cell_starts_with_split": cell_starts_with_split, + "cell_ends_with_merge": cell_ends_with_merge, + }, + coords={feature_dim: feature_id, cell_dim: cell_id, track_dim: track_id}, + ) - return d + return merge_split_ds diff --git a/tobac/tests/test_merge_split.py b/tobac/tests/test_merge_split.py index 83200192..8b3ecdb4 100644 --- a/tobac/tests/test_merge_split.py +++ b/tobac/tests/test_merge_split.py @@ -3,6 +3,7 @@ import pandas as pd import numpy as np +import pytest import datetime @@ -78,7 +79,7 @@ def test_merge_split_MEST(): # Test a successful merger mergesplit_output_merged = mergesplit.merge_split_MEST( - output, dxy=1, distance=dist_between + 50 + output, dxy=10, distance=(dist_between + 50) * 10 ) # These cells should have merged together. @@ -86,9 +87,256 @@ def test_merge_split_MEST(): # Test an unsuccessful merger. mergesplit_output_unmerged = mergesplit.merge_split_MEST( - output, dxy=1, distance=dist_between - 50 + output, dxy=10, distance=(dist_between - 50) * 10 ) # These cells should NOT have merged together. print(mergesplit_output_unmerged["track"]) assert len(mergesplit_output_unmerged["track"]) == 2 + + +def test_merge_split_MEST_PBC(): + """ + Test PBC handling for merge_split_MEST + """ + test_features = pd.DataFrame( + { + "feature": [1, 2, 3, 4], + "hdim_1": [1, 89, 1, 99], + "hdim_2": [50, 50, 50, 50], + "cell": [1, 2, 1, 2], + "frame": [0, 0, 1, 1], + "time": [ + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1, 0, 5), + datetime.datetime(2000, 1, 1, 0, 5), + ], + } + ) + # Test without PBCs + mergesplit_output_no_pbc = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=25, + ) + + assert len(mergesplit_output_no_pbc["track"]) == 2 + + # Test with PBC in hdim_1, cells should merge + mergesplit_output_hdim_1_pbc = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=25, + PBC_flag="hdim_1", + min_h1=0, + max_h1=100, + min_h2=0, + max_h2=100, + ) + + assert len(mergesplit_output_hdim_1_pbc["track"]) == 1 + + # Test with PBC in hdim_2, cells should not merge + mergesplit_output_hdim_2_pbc = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=25, + PBC_flag="hdim_2", + min_h1=0, + max_h1=100, + min_h2=0, + max_h2=100, + ) + + assert len(mergesplit_output_hdim_2_pbc["track"]) == 2 + + +def test_merge_split_MEST_frame_len(): + """ + Test the frame_len parameter of merge_split_MEST + """ + test_features = pd.DataFrame( + { + "feature": [1, 2, 3, 4], + "hdim_1": [50, 50, 50, 50], + "hdim_2": [50, 50, 50, 50], + "cell": [1, 1, 2, 2], + "frame": [0, 1, 3, 4], + "time": [ + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1, 0, 5), + datetime.datetime(2000, 1, 1, 0, 5), + ], + } + ) + + # Test with short frame_len, expect no link + mergesplit_output = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=25, + frame_len=1, + ) + assert len(mergesplit_output["track"]) == 2 + + # Test with longer frame_len, expect link + mergesplit_output = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=25, + frame_len=2, + ) + assert len(mergesplit_output["track"]) == 1 + + +def test_merge_split_MEST_no_cell(): + """ + Test merge/split in cases with features with no cell + """ + test_features = pd.DataFrame( + { + "feature": [1, 2, 3], + "hdim_1": [25, 30, 50], + "hdim_2": [25, 30, 50], + "cell": [1, -1, 1], + "frame": [0, 0, 1], + "time": [ + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1, 0, 5), + ], + } + ) + + mergesplit_output = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=25, + ) + + assert len(mergesplit_output["track"]) == 1 + + assert mergesplit_output["feature_parent_cell_id"].values[1] == -1 + + +def test_merge_split_MEST_3D(): + """ + Test merge/split support for 3D tracks and dz input + """ + + test_features = pd.DataFrame( + { + "feature": [1, 2, 3, 4], + "vdim": [1, 2, 1, 2], + "hdim_1": [50, 40, 50, 40], + "hdim_2": [50, 50, 50, 50], + "cell": [1, 2, 1, 2], + "frame": [0, 0, 1, 1], + "altitude": [500, 750, 1500, 2000], + "time": [ + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1, 0, 5), + datetime.datetime(2000, 1, 1, 0, 5), + ], + } + ) + + # Test with dz=10, expect merge + mergesplit_output_3d_merge = mergesplit.merge_split_MEST( + test_features, + dxy=1, + dz=10, + distance=20, + ) + + assert len(mergesplit_output_3d_merge["track"]) == 1 + + # Test with dz=25, expect no merge + mergesplit_output_3d_nomerge = mergesplit.merge_split_MEST( + test_features, + dxy=1, + dz=25, + distance=20, + ) + + assert len(mergesplit_output_3d_nomerge["track"]) == 2 + + # Test providing vertical_coord + mergesplit_output_3d_coord_merge = mergesplit.merge_split_MEST( + test_features, + dxy=1, + vertical_coord="altitude", + distance=1100, + ) + mergesplit_output_3d_coord_nomerge = mergesplit.merge_split_MEST( + test_features, + dxy=1, + vertical_coord="altitude", + distance=20, + ) + + assert len(mergesplit_output_3d_coord_merge["track"]) == 1 + assert len(mergesplit_output_3d_coord_nomerge["track"]) == 2 + + # Test auto find vertical_coord + mergesplit_output_3d_coord_merge = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=1100, + ) + mergesplit_output_3d_coord_nomerge = mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=20, + ) + + assert len(mergesplit_output_3d_coord_merge["track"]) == 1 + assert len(mergesplit_output_3d_coord_nomerge["track"]) == 2 + + # Test error if both dz and coord are provided + with pytest.raises(ValueError): + mergesplit.merge_split_MEST( + test_features, + dxy=1, + dz=1, + vertical_coord="auto", + distance=20, + ) + + # Test that wrong vertical coord name causes an error + with pytest.raises(ValueError): + mergesplit.merge_split_MEST( + test_features, + dxy=1, + vertical_coord="invalid_coord_name", + distance=20, + ) + + # Test that auto search fails if coordinate name is not in default list + test_features = pd.DataFrame( + { + "feature": [1, 2, 3, 4], + "vdim": [1, 2, 1, 2], + "hdim_1": [50, 40, 50, 40], + "hdim_2": [50, 50, 50, 50], + "cell": [1, 2, 1, 2], + "frame": [0, 0, 1, 1], + "invalid_coord_name": [500, 1500, 1000, 2000], + "time": [ + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1), + datetime.datetime(2000, 1, 1, 0, 5), + datetime.datetime(2000, 1, 1, 0, 5), + ], + } + ) + + with pytest.raises(ValueError): + mergesplit.merge_split_MEST( + test_features, + dxy=1, + distance=20, + )