From 0e2b1c39095a28bd182953e2551c18d9aa03c1e4 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sun, 10 Sep 2023 21:37:38 -0500 Subject: [PATCH 01/32] begun refactor of internal utils --- tobac/utils/internal/__init__.py | 1 + .../general_internal.py} | 64 ++++++++------- tobac/utils/internal/iris_utils.py | 82 +++++++++++++++++++ 3 files changed, 116 insertions(+), 31 deletions(-) create mode 100644 tobac/utils/internal/__init__.py rename tobac/utils/{internal.py => internal/general_internal.py} (96%) create mode 100644 tobac/utils/internal/iris_utils.py diff --git a/tobac/utils/internal/__init__.py b/tobac/utils/internal/__init__.py new file mode 100644 index 00000000..463d824c --- /dev/null +++ b/tobac/utils/internal/__init__.py @@ -0,0 +1 @@ +from .general_internal import * diff --git a/tobac/utils/internal.py b/tobac/utils/internal/general_internal.py similarity index 96% rename from tobac/utils/internal.py rename to tobac/utils/internal/general_internal.py index 1354504f..6f90ffc0 100644 --- a/tobac/utils/internal.py +++ b/tobac/utils/internal/general_internal.py @@ -4,7 +4,10 @@ import skimage.measure import xarray as xr import iris +import iris.cube import warnings +from . import iris_utils +from typing import Union def _warn_auto_coordinate(): @@ -498,36 +501,6 @@ def find_vertical_axis_from_coord(variable_cube, vertical_coord=None): raise ValueError("Please specify vertical coordinate found in cube") -def find_axis_from_coord(variable_cube, coord_name): - """Finds the axis number in an iris cube given a coordinate name. - - Parameters - ---------- - variable_cube: iris.cube - Input variable cube - coord_name: str - coordinate to look for - - Returns - ------- - axis_number: int - the number of the axis of the given coordinate, or None if the coordinate - is not found in the cube or not a dimensional coordinate - """ - - list_coord_names = [coord.name() for coord in variable_cube.coords()] - all_matching_axes = list(set(list_coord_names) & set((coord_name,))) - if ( - len(all_matching_axes) == 1 - and len(variable_cube.coord_dims(all_matching_axes[0])) > 0 - ): - return variable_cube.coord_dims(all_matching_axes[0])[0] - elif len(all_matching_axes) > 1: - raise ValueError("Too many axes matched.") - else: - return None - - def find_dataframe_vertical_coord(variable_dataframe, vertical_coord=None): """Function to find the vertical coordinate in the iris cube @@ -633,6 +606,35 @@ def find_hdim_axes_3D(field_in, vertical_coord=None, vertical_axis=None): raise ValueError("Unknown data type: " + type(field_in).__name__) +def find_axis_from_coord( + variable_arr: Union[iris.cube.Cube, xr.DataArray], coord_name: str +): + """Finds the axis number in an xarray or iris cube given a coordinate or dimension name. + + Parameters + ---------- + variable_arr: iris.cube.Cube or xarray.DataArray + Input variable cube + coord_name: str + coordinate or dimension to look for + + Returns + ------- + axis_number: int + the number of the axis of the given coordinate, or None if the coordinate + is not found in the variable or not a dimensional coordinate + """ + + if isinstance(variable_arr, iris.cube.Cube): + return iris_utils.find_axis_from_coord(variable_arr, coord_name) + elif isinstance(variable_arr, xr.DataArray): + raise NotImplementedError( + "xarray version of find_axis_from_coord not implemented." + ) + else: + raise ValueError("variable_arr must be Iris Cube or Xarray DataArray") + + def find_hdim_axes_3D_iris(field_in, vertical_coord=None, vertical_axis=None): """Finds what the hdim axes are given a 3D (including z) or 4D (including z and time) dataset. @@ -661,7 +663,7 @@ def find_hdim_axes_3D_iris(field_in, vertical_coord=None, vertical_axis=None): if vertical_coord != "auto": raise ValueError("Cannot set both vertical_coord and vertical_axis.") - time_axis = find_axis_from_coord(field_in, "time") + time_axis = iris_utils.find_axis_from_coord(field_in, "time") if vertical_axis is not None: vertical_coord_axis = vertical_axis vert_coord_found = True diff --git a/tobac/utils/internal/iris_utils.py b/tobac/utils/internal/iris_utils.py new file mode 100644 index 00000000..f62d8729 --- /dev/null +++ b/tobac/utils/internal/iris_utils.py @@ -0,0 +1,82 @@ +"""Internal tobac utilities for iris cubes +The goal will be to, ultimately, remove these when we sunset iris +""" + +from typing import Union +import iris +import iris.cube + + +def find_axis_from_coord(variable_cube: iris.cube.Cube, coord_name: str): + """Finds the axis number in an iris cube given a coordinate name. + + Parameters + ---------- + variable_cube: iris.cube + Input variable cube + coord_name: str + coordinate to look for + + Returns + ------- + axis_number: int + the number of the axis of the given coordinate, or None if the coordinate + is not found in the cube or not a dimensional coordinate + """ + + list_coord_names = [coord.name() for coord in variable_cube.coords()] + all_matching_axes = list(set(list_coord_names) & {coord_name}) + if ( + len(all_matching_axes) == 1 + and len(variable_cube.coord_dims(all_matching_axes[0])) > 0 + ): + return variable_cube.coord_dims(all_matching_axes[0])[0] + if len(all_matching_axes) > 1: + raise ValueError("Too many axes matched.") + + return None + + +def find_vertical_axis_from_coord( + variable_cube: iris.cube.Cube, vertical_coord: Union[str, None] = None +): + """Function to find the vertical coordinate in the iris cube + + Parameters + ---------- + variable_cube: iris.cube + Input variable cube, containing a vertical coordinate. + vertical_coord: str + Vertical coordinate name. If None, this function tries to auto-detect. + + Returns + ------- + str + the vertical coordinate name + + Raises + ------ + ValueError + Raised if the vertical coordinate isn't found in the cube. + """ + list_vertical = [ + "z", + "model_level_number", + "altitude", + "geopotential_height", + ] + + list_coord_names = [coord.name() for coord in variable_cube.coords()] + + if vertical_coord is None or vertical_coord == "auto": + # find the intersection + all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) + if len(all_vertical_axes) >= 1: + return all_vertical_axes[0] + raise ValueError( + "Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, " + "or geopotential_height)" + ) + if vertical_coord in list_coord_names: + return vertical_coord + raise ValueError("Please specify vertical coordinate found in cube") From f12bd72c52cfad6a43d59b4a2f0bc5dd4aed556f Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 11 Sep 2023 07:14:52 -0500 Subject: [PATCH 02/32] add new xarray utils file for xarray functions --- tobac/utils/internal/general_internal.py | 9 ++++++--- tobac/utils/internal/xarray_utils.py | 5 +++++ 2 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 tobac/utils/internal/xarray_utils.py diff --git a/tobac/utils/internal/general_internal.py b/tobac/utils/internal/general_internal.py index 6f90ffc0..e9d1d415 100644 --- a/tobac/utils/internal/general_internal.py +++ b/tobac/utils/internal/general_internal.py @@ -449,12 +449,15 @@ def njit_if_available(func, **kwargs): return func -def find_vertical_axis_from_coord(variable_cube, vertical_coord=None): +def find_vertical_axis_from_coord( + variable_cube: Union[iris.cube.Cube, xr.DataArray], + vertical_coord: Union[str, None] = None, +): """Function to find the vertical coordinate in the iris cube Parameters ---------- - variable_cube: iris.cube + variable_cube: iris.cube.Cube or xarray.DataArray Input variable cube, containing a vertical coordinate. vertical_coord: str Vertical coordinate name. If None, this function tries to auto-detect. @@ -480,7 +483,7 @@ def find_vertical_axis_from_coord(variable_cube, vertical_coord=None): _warn_auto_coordinate() if isinstance(variable_cube, iris.cube.Cube): - list_coord_names = [coord.name() for coord in variable_cube.coords()] + return iris_utils.find_vertical_axis_from_coord(variable_cube, vertical_coord) elif isinstance(variable_cube, xr.Dataset) or isinstance( variable_cube, xr.DataArray ): diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py new file mode 100644 index 00000000..c0df9885 --- /dev/null +++ b/tobac/utils/internal/xarray_utils.py @@ -0,0 +1,5 @@ +"""Internal tobac utilities for xarray datasets/dataarrays +""" + +from typing import Union +import xarray as xr From e8b8c718c09b5acaf0237bd3dfdbdf6e7b353f14 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 11 Sep 2023 22:11:26 -0500 Subject: [PATCH 03/32] continuing to separate out utilities into iris and xarray --- tobac/utils/internal/general_internal.py | 57 +++++++-------- tobac/utils/internal/iris_utils.py | 91 +++++++++++++++++++++--- tobac/utils/internal/xarray_utils.py | 43 +++++++++++ 3 files changed, 152 insertions(+), 39 deletions(-) diff --git a/tobac/utils/internal/general_internal.py b/tobac/utils/internal/general_internal.py index e9d1d415..ae70bc8a 100644 --- a/tobac/utils/internal/general_internal.py +++ b/tobac/utils/internal/general_internal.py @@ -5,10 +5,20 @@ import xarray as xr import iris import iris.cube +import pandas as pd import warnings from . import iris_utils +from . import xarray_utils as xr_utils from typing import Union +# list of common vertical coordinates to search for in various functions +COMMON_VERT_COORDS: list[str] = [ + "z", + "model_level_number", + "altitude", + "geopotential_height", +] + def _warn_auto_coordinate(): """ @@ -472,39 +482,21 @@ def find_vertical_axis_from_coord( ValueError Raised if the vertical coordinate isn't found in the cube. """ - list_vertical = [ - "z", - "model_level_number", - "altitude", - "geopotential_height", - ] if vertical_coord == "auto": _warn_auto_coordinate() if isinstance(variable_cube, iris.cube.Cube): return iris_utils.find_vertical_axis_from_coord(variable_cube, vertical_coord) - elif isinstance(variable_cube, xr.Dataset) or isinstance( - variable_cube, xr.DataArray - ): - list_coord_names = variable_cube.coords + if isinstance(variable_cube, xr.Dataset) or isinstance(variable_cube, xr.DataArray): + return xr_utils.find_vertical_axis_from_coord(variable_cube, vertical_coord) - if vertical_coord is None or vertical_coord == "auto": - # find the intersection - all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) - if len(all_vertical_axes) >= 1: - return all_vertical_axes[0] - else: - raise ValueError( - "Cube lacks suitable automatic vertical coordinate (z, model_level_number, altitude, or geopotential_height)" - ) - elif vertical_coord in list_coord_names: - return vertical_coord - else: - raise ValueError("Please specify vertical coordinate found in cube") + raise ValueError("variable_cube must be xr.DataArray or iris.cube.Cube") -def find_dataframe_vertical_coord(variable_dataframe, vertical_coord=None): +def find_dataframe_vertical_coord( + variable_dataframe: pd.DataFrame, vertical_coord: Union[str, None] = None +): """Function to find the vertical coordinate in the iris cube Parameters @@ -529,8 +521,9 @@ def find_dataframe_vertical_coord(variable_dataframe, vertical_coord=None): _warn_auto_coordinate() if vertical_coord is None or vertical_coord == "auto": - list_vertical = ["z", "model_level_number", "altitude", "geopotential_height"] - all_vertical_axes = list(set(variable_dataframe.columns) & set(list_vertical)) + all_vertical_axes = list( + set(variable_dataframe.columns) & set(COMMON_VERT_COORDS) + ) if len(all_vertical_axes) == 1: return all_vertical_axes[0] else: @@ -544,7 +537,7 @@ def find_dataframe_vertical_coord(variable_dataframe, vertical_coord=None): @njit_if_available -def calc_distance_coords(coords_1, coords_2): +def calc_distance_coords(coords_1: np.array, coords_2: np.array): """Function to calculate the distance between cartesian coordinate set 1 and coordinate set 2. Parameters @@ -571,13 +564,17 @@ def calc_distance_coords(coords_1, coords_2): return np.sqrt(np.sum(deltas**2)) -def find_hdim_axes_3D(field_in, vertical_coord=None, vertical_axis=None): +def find_hdim_axes_3D( + field_in: Union[iris.cube.Cube, xr.DataArray], + vertical_coord: Union[str, None] = None, + vertical_axis: Union[int, None] = None, +): """Finds what the hdim axes are given a 3D (including z) or 4D (including z and time) dataset. Parameters ---------- - field_in: iris cube or xarray dataset + field_in: iris cube or xarray dataarray Input field, can be 3D or 4D vertical_coord: str The name of the vertical coord, or None, which will attempt to find @@ -602,7 +599,7 @@ def find_hdim_axes_3D(field_in, vertical_coord=None, vertical_axis=None): raise ValueError("Cannot set both vertical_coord and vertical_axis.") if type(field_in) is iris_cube.Cube: - return find_hdim_axes_3D_iris(field_in, vertical_coord, vertical_axis) + return iris_utils.find_hdim_axes_3d(field_in, vertical_coord, vertical_axis) elif type(field_in) is xr.DataArray: raise NotImplementedError("Xarray find_hdim_axes_3D not implemented") else: diff --git a/tobac/utils/internal/iris_utils.py b/tobac/utils/internal/iris_utils.py index f62d8729..a31b0943 100644 --- a/tobac/utils/internal/iris_utils.py +++ b/tobac/utils/internal/iris_utils.py @@ -3,11 +3,17 @@ """ from typing import Union + import iris import iris.cube +import numpy as np + +from . import general_internal as tb_utils_gi -def find_axis_from_coord(variable_cube: iris.cube.Cube, coord_name: str): +def find_axis_from_coord( + variable_cube: iris.cube.Cube, coord_name: str +) -> Union[int, None]: """Finds the axis number in an iris cube given a coordinate name. Parameters @@ -39,7 +45,7 @@ def find_axis_from_coord(variable_cube: iris.cube.Cube, coord_name: str): def find_vertical_axis_from_coord( variable_cube: iris.cube.Cube, vertical_coord: Union[str, None] = None -): +) -> str: """Function to find the vertical coordinate in the iris cube Parameters @@ -59,18 +65,14 @@ def find_vertical_axis_from_coord( ValueError Raised if the vertical coordinate isn't found in the cube. """ - list_vertical = [ - "z", - "model_level_number", - "altitude", - "geopotential_height", - ] list_coord_names = [coord.name() for coord in variable_cube.coords()] if vertical_coord is None or vertical_coord == "auto": # find the intersection - all_vertical_axes = list(set(list_coord_names) & set(list_vertical)) + all_vertical_axes = list( + set(list_coord_names) & set(tb_utils_gi.COMMON_VERT_COORDS) + ) if len(all_vertical_axes) >= 1: return all_vertical_axes[0] raise ValueError( @@ -80,3 +82,74 @@ def find_vertical_axis_from_coord( if vertical_coord in list_coord_names: return vertical_coord raise ValueError("Please specify vertical coordinate found in cube") + + +def find_hdim_axes_3d( + field_in: iris.cube.Cube, + vertical_coord: Union[str, None] = None, + vertical_axis: Union[int, None] = None, +) -> tuple[int]: + """Finds what the hdim axes are given a 3D (including z) or + 4D (including z and time) dataset. + + Parameters + ---------- + field_in: iris cube + Input field, can be 3D or 4D + vertical_coord: str or None + The name of the vertical coord, or None, which will attempt to find + the vertical coordinate name + vertical_axis: int or None + The axis number of the vertical coordinate, or None. Note + that only one of vertical_axis or vertical_coord can be set. + + Returns + ------- + (hdim_1_axis, hdim_2_axis): (int, int) + The axes for hdim_1 and hdim_2 + """ + + if vertical_coord is not None and vertical_axis is not None: + if vertical_coord != "auto": + raise ValueError("Cannot set both vertical_coord and vertical_axis.") + + time_axis = find_axis_from_coord(field_in, "time") + if vertical_axis is not None: + vertical_coord_axis = vertical_axis + vert_coord_found = True + else: + try: + vertical_axis = find_vertical_axis_from_coord( + field_in, vertical_coord=vertical_coord + ) + except ValueError: + vert_coord_found = False + else: + vert_coord_found = True + ndim_vertical = field_in.coord_dims(vertical_axis) + if len(ndim_vertical) > 1: + raise ValueError( + "please specify 1 dimensional vertical coordinate." + f" Current vertical coordinates: {ndim_vertical}" + ) + if len(ndim_vertical) != 0: + vertical_coord_axis = ndim_vertical[0] + else: + # this means the vertical coordinate is an auxiliary coordinate of some kind. + vert_coord_found = False + + if not vert_coord_found: + # if we don't have a vertical coordinate, and we are 3D or lower + # that is okay. + if (field_in.ndim == 3 and time_axis is not None) or field_in.ndim < 3: + vertical_coord_axis = None + else: + raise ValueError("No suitable vertical coordinate found") + # Once we know the vertical coordinate, we can resolve the + # horizontal coordinates + + all_axes = np.arange(0, field_in.ndim) + output_vals = tuple( + all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))] + ) + return output_vals diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index c0df9885..31579ef2 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -3,3 +3,46 @@ from typing import Union import xarray as xr +from . import general_internal as tb_utils_gi + + +def find_vertical_axis_from_coord( + variable_cube: xr.DataArray, + vertical_coord: Union[str, None] = None, +): + """Function to find the vertical coordinate in the iris cube + + Parameters + ---------- + variable_cube: iris.cube.Cube or xarray.DataArray + Input variable cube, containing a vertical coordinate. + vertical_coord: str + Vertical coordinate name. If None, this function tries to auto-detect. + + Returns + ------- + str + the vertical coordinate name + + Raises + ------ + ValueError + Raised if the vertical coordinate isn't found in the cube. + """ + + list_coord_names = variable_cube.coords + + if vertical_coord is None or vertical_coord == "auto": + # find the intersection + all_vertical_axes = list( + set(list_coord_names) & set(tb_utils_gi.COMMON_VERT_COORDS) + ) + if len(all_vertical_axes) >= 1: + return all_vertical_axes[0] + raise ValueError( + "Cube lacks suitable automatic vertical coordinate (z, model_level_number, " + "altitude, or geopotential_height)" + ) + if vertical_coord in list_coord_names: + return vertical_coord + raise ValueError("Please specify vertical coordinate found in cube") From a480730eced28c7ed924e799c1709c1691e8b0e4 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 13 Sep 2023 11:22:06 -0500 Subject: [PATCH 04/32] ensure py3.8 compatibility, ensure output types work --- tobac/utils/internal/general_internal.py | 109 +++++------------------ tobac/utils/internal/iris_utils.py | 1 + tobac/utils/internal/xarray_utils.py | 4 +- 3 files changed, 26 insertions(+), 88 deletions(-) diff --git a/tobac/utils/internal/general_internal.py b/tobac/utils/internal/general_internal.py index ae70bc8a..271cccaa 100644 --- a/tobac/utils/internal/general_internal.py +++ b/tobac/utils/internal/general_internal.py @@ -1,5 +1,7 @@ """Internal tobac utilities """ +from __future__ import annotations + import numpy as np import skimage.measure import xarray as xr @@ -9,7 +11,7 @@ import warnings from . import iris_utils from . import xarray_utils as xr_utils -from typing import Union +from typing import Union, Callable # list of common vertical coordinates to search for in various functions COMMON_VERT_COORDS: list[str] = [ @@ -30,7 +32,7 @@ def _warn_auto_coordinate(): ) -def get_label_props_in_dict(labels): +def get_label_props_in_dict(labels: np.array) -> dict: """Function to get the label properties into a dictionary format. Parameters @@ -53,7 +55,7 @@ def get_label_props_in_dict(labels): return region_properties_dict -def get_indices_of_labels_from_reg_prop_dict(region_property_dict): +def get_indices_of_labels_from_reg_prop_dict(region_property_dict: dict) -> tuple[dict]: """Function to get the x, y, and z indices (as well as point count) of all labeled regions. Parameters ---------- @@ -107,7 +109,7 @@ def get_indices_of_labels_from_reg_prop_dict(region_property_dict): return [curr_loc_indices, y_indices, x_indices] -def iris_to_xarray(func): +def iris_to_xarray(func: Callable) -> Callable: """Decorator that converts all input of a function that is in the form of Iris cubes into xarray DataArrays and converts all outputs with type xarray DataArrays back into Iris cubes. @@ -173,7 +175,7 @@ def wrapper(*args, **kwargs): return wrapper -def xarray_to_iris(func): +def xarray_to_iris(func: Callable) -> Callable: """Decorator that converts all input of a function that is in the form of xarray DataArrays into Iris cubes and converts all outputs with type Iris cubes back into xarray DataArrays. @@ -257,7 +259,7 @@ def wrapper(*args, **kwargs): return wrapper -def irispandas_to_xarray(func): +def irispandas_to_xarray(func: Callable) -> Callable: """Decorator that converts all input of a function that is in the form of Iris cubes/pandas Dataframes into xarray DataArrays/xarray Datasets and converts all outputs with the type xarray DataArray/xarray Dataset @@ -337,7 +339,7 @@ def wrapper(*args, **kwargs): return wrapper -def xarray_to_irispandas(func): +def xarray_to_irispandas(func: Callable) -> Callable: """Decorator that converts all input of a function that is in the form of DataArrays/xarray Datasets into xarray Iris cubes/pandas Dataframes and converts all outputs with the type Iris cubes/pandas Dataframes back into @@ -440,7 +442,7 @@ def wrapper(*args, **kwargs): return wrapper -def njit_if_available(func, **kwargs): +def njit_if_available(func: Callable, **kwargs) -> Callable: """Decorator to wrap a function with numba.njit if available. If numba isn't available, it just returns the function. @@ -462,7 +464,7 @@ def njit_if_available(func, **kwargs): def find_vertical_axis_from_coord( variable_cube: Union[iris.cube.Cube, xr.DataArray], vertical_coord: Union[str, None] = None, -): +) -> str: """Function to find the vertical coordinate in the iris cube Parameters @@ -496,7 +498,7 @@ def find_vertical_axis_from_coord( def find_dataframe_vertical_coord( variable_dataframe: pd.DataFrame, vertical_coord: Union[str, None] = None -): +) -> str: """Function to find the vertical coordinate in the iris cube Parameters @@ -537,7 +539,7 @@ def find_dataframe_vertical_coord( @njit_if_available -def calc_distance_coords(coords_1: np.array, coords_2: np.array): +def calc_distance_coords(coords_1: np.array, coords_2: np.array) -> float: """Function to calculate the distance between cartesian coordinate set 1 and coordinate set 2. Parameters @@ -568,7 +570,7 @@ def find_hdim_axes_3D( field_in: Union[iris.cube.Cube, xr.DataArray], vertical_coord: Union[str, None] = None, vertical_axis: Union[int, None] = None, -): +) -> tuple[int]: """Finds what the hdim axes are given a 3D (including z) or 4D (including z and time) dataset. @@ -589,7 +591,6 @@ def find_hdim_axes_3D( The axes for hdim_1 and hdim_2 """ - from iris import cube as iris_cube if vertical_coord == "auto": _warn_auto_coordinate() @@ -598,7 +599,7 @@ def find_hdim_axes_3D( if vertical_coord != "auto": raise ValueError("Cannot set both vertical_coord and vertical_axis.") - if type(field_in) is iris_cube.Cube: + if type(field_in) is iris.cube.Cube: return iris_utils.find_hdim_axes_3d(field_in, vertical_coord, vertical_axis) elif type(field_in) is xr.DataArray: raise NotImplementedError("Xarray find_hdim_axes_3D not implemented") @@ -608,7 +609,7 @@ def find_hdim_axes_3D( def find_axis_from_coord( variable_arr: Union[iris.cube.Cube, xr.DataArray], coord_name: str -): +) -> int: """Finds the axis number in an xarray or iris cube given a coordinate or dimension name. Parameters @@ -635,83 +636,17 @@ def find_axis_from_coord( raise ValueError("variable_arr must be Iris Cube or Xarray DataArray") -def find_hdim_axes_3D_iris(field_in, vertical_coord=None, vertical_axis=None): - """Finds what the hdim axes are given a 3D (including z) or - 4D (including z and time) dataset. - - Parameters - ---------- - field_in: iris cube - Input field, can be 3D or 4D - vertical_coord: str or None - The name of the vertical coord, or None, which will attempt to find - the vertical coordinate name - vertical_axis: int or None - The axis number of the vertical coordinate, or None. Note - that only one of vertical_axis or vertical_coord can be set. - - Returns - ------- - (hdim_1_axis, hdim_2_axis): (int, int) - The axes for hdim_1 and hdim_2 - """ - - if vertical_coord == "auto": - _warn_auto_coordinate() - - if vertical_coord is not None and vertical_axis is not None: - if vertical_coord != "auto": - raise ValueError("Cannot set both vertical_coord and vertical_axis.") - - time_axis = iris_utils.find_axis_from_coord(field_in, "time") - if vertical_axis is not None: - vertical_coord_axis = vertical_axis - vert_coord_found = True - else: - try: - vertical_axis = find_vertical_axis_from_coord( - field_in, vertical_coord=vertical_coord - ) - except ValueError: - vert_coord_found = False - else: - vert_coord_found = True - ndim_vertical = field_in.coord_dims(vertical_axis) - if len(ndim_vertical) > 1: - raise ValueError( - "please specify 1 dimensional vertical coordinate." - " Current vertical coordinates: {0}".format(ndim_vertical) - ) - if len(ndim_vertical) != 0: - vertical_coord_axis = ndim_vertical[0] - else: - # this means the vertical coordinate is an auxiliary coordinate of some kind. - vert_coord_found = False - - if not vert_coord_found: - # if we don't have a vertical coordinate, and we are 3D or lower - # that is okay. - if (field_in.ndim == 3 and time_axis is not None) or field_in.ndim < 3: - vertical_coord_axis = None - else: - raise ValueError("No suitable vertical coordinate found") - # Once we know the vertical coordinate, we can resolve the - # horizontal coordinates - - all_axes = np.arange(0, field_in.ndim) - output_vals = tuple( - all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))] - ) - return output_vals - - @irispandas_to_xarray -def detect_latlon_coord_name(in_dataset, latitude_name=None, longitude_name=None): +def detect_latlon_coord_name( + in_dataset: Union[xr.DataArray, iris.cube.Cube], + latitude_name: Union[str, None] = None, + longitude_name: Union[str, None] = None, +) -> tuple[str]: """Function to detect the name of latitude/longitude coordinates Parameters ---------- - in_dataset: iris.cube.Cube, xarray.Dataset, or xarray.Dataarray + in_dataset: iris.cube.Cube or xarray.DataArray Input dataset to detect names from latitude_name: str The name of the latitude coordinate. If None, tries to auto-detect. diff --git a/tobac/utils/internal/iris_utils.py b/tobac/utils/internal/iris_utils.py index a31b0943..029061cb 100644 --- a/tobac/utils/internal/iris_utils.py +++ b/tobac/utils/internal/iris_utils.py @@ -1,6 +1,7 @@ """Internal tobac utilities for iris cubes The goal will be to, ultimately, remove these when we sunset iris """ +from __future__ import annotations from typing import Union diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 31579ef2..641be123 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -1,5 +1,7 @@ """Internal tobac utilities for xarray datasets/dataarrays """ +from __future__ import annotations + from typing import Union import xarray as xr @@ -9,7 +11,7 @@ def find_vertical_axis_from_coord( variable_cube: xr.DataArray, vertical_coord: Union[str, None] = None, -): +) -> str: """Function to find the vertical coordinate in the iris cube Parameters From 3c7404e70da2bb900ebbf0caeeb614164e40b092 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 19 Sep 2023 13:33:51 -0700 Subject: [PATCH 05/32] merging in new changes --- tobac/feature_detection.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 7c2b9351..ca51fe1d 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -269,8 +269,11 @@ def test_overlap( def remove_parents( - features_thresholds: pd.DataFrame, regions_i: dict, regions_old: dict, strict_thresholding: bool=False -)->pd.DataFrame: + features_thresholds: pd.DataFrame, + regions_i: dict, + regions_old: dict, + strict_thresholding: bool = False, +) -> pd.DataFrame: """Remove parents of newly detected feature regions. Remove features where its regions surround newly From 94d6da3468d065ca28ec5da92e37fed94b0c30f5 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 20 Sep 2023 16:39:50 -0700 Subject: [PATCH 06/32] Updates to xarray support in utilities. --- tobac/testing.py | 28 +++- tobac/tests/test_utils_internal.py | 5 + tobac/utils/internal/general_internal.py | 12 +- tobac/utils/internal/xarray_utils.py | 198 ++++++++++++++++++++++- 4 files changed, 230 insertions(+), 13 deletions(-) diff --git a/tobac/testing.py b/tobac/testing.py index 6eb38d50..33277abf 100644 --- a/tobac/testing.py +++ b/tobac/testing.py @@ -520,6 +520,7 @@ def make_dataset_from_arr( import xarray as xr import iris + time_dim_name = "time" has_time = time_dim_num is not None is_3D = z_dim_num is not None @@ -530,10 +531,29 @@ def make_dataset_from_arr( if has_time: time_min = datetime.datetime(2022, 1, 1) time_num = in_arr.shape[time_dim_num] + time_vals = pd.date_range(start=time_min, periods=time_num).values.astype( + "datetime64[s]" + ) if data_type == "xarray": + # add dimension and coordinates + if is_3D: + output_arr = output_arr.rename( + new_name_or_name_dict={"dim_" + str(z_dim_num): z_dim_name} + ) + output_arr = output_arr.assign_coords( + {z_dim_name: (z_dim_name, np.arange(0, z_max))} + ) + # add dimension and coordinates + if has_time: + output_arr = output_arr.rename( + new_name_or_name_dict={"dim_" + str(time_dim_num): time_dim_name} + ) + output_arr = output_arr.assign_coords( + {time_dim_name: (time_dim_name, time_vals)} + ) return output_arr - elif data_type == "iris": + if data_type == "iris": out_arr_iris = output_arr.to_iris() if is_3D: @@ -544,10 +564,8 @@ def make_dataset_from_arr( if has_time: out_arr_iris.add_dim_coord( iris.coords.DimCoord( - pd.date_range(start=time_min, periods=time_num) - .values.astype("datetime64[s]") - .astype(int), - standard_name="time", + time_vals.astype(int), + standard_name=time_dim_name, units="seconds since epoch", ), time_dim_num, diff --git a/tobac/tests/test_utils_internal.py b/tobac/tests/test_utils_internal.py index 0dd03cb0..9d34e9d3 100644 --- a/tobac/tests/test_utils_internal.py +++ b/tobac/tests/test_utils_internal.py @@ -16,6 +16,11 @@ ("iris", 3, 0, (1, 2)), ("iris", 0, 3, (1, 2)), ("iris", 1, 2, (0, 3)), + ("xarray", 0, 1, (2, 3)), + ("xarray", 0, 2, (1, 3)), + ("xarray", 3, 0, (1, 2)), + ("xarray", 0, 3, (1, 2)), + ("xarray", 1, 2, (0, 3)), ], ) def test_find_hdim_axes_3D(dset_type, time_axis, vertical_axis, expected_out): diff --git a/tobac/utils/internal/general_internal.py b/tobac/utils/internal/general_internal.py index a9d3df3c..1103136f 100644 --- a/tobac/utils/internal/general_internal.py +++ b/tobac/utils/internal/general_internal.py @@ -473,6 +473,8 @@ def find_vertical_axis_from_coord( ) -> str: """Function to find the vertical coordinate in the iris cube + TODO: this function should be renamed + Parameters ---------- variable_cube: iris.cube.Cube or xarray.DataArray @@ -496,8 +498,8 @@ def find_vertical_axis_from_coord( if isinstance(variable_cube, iris.cube.Cube): return iris_utils.find_vertical_axis_from_coord(variable_cube, vertical_coord) - if isinstance(variable_cube, xr.Dataset) or isinstance(variable_cube, xr.DataArray): - return xr_utils.find_vertical_axis_from_coord(variable_cube, vertical_coord) + if isinstance(variable_cube, xr.DataArray): + return xr_utils.find_vertical_coord_name(variable_cube, vertical_coord) raise ValueError("variable_cube must be xr.DataArray or iris.cube.Cube") @@ -608,7 +610,7 @@ def find_hdim_axes_3D( if type(field_in) is iris.cube.Cube: return iris_utils.find_hdim_axes_3d(field_in, vertical_coord, vertical_axis) elif type(field_in) is xr.DataArray: - raise NotImplementedError("Xarray find_hdim_axes_3D not implemented") + return xr_utils.find_hdim_axes_3d(field_in, vertical_coord, vertical_axis) else: raise ValueError("Unknown data type: " + type(field_in).__name__) @@ -635,9 +637,7 @@ def find_axis_from_coord( if isinstance(variable_arr, iris.cube.Cube): return iris_utils.find_axis_from_coord(variable_arr, coord_name) elif isinstance(variable_arr, xr.DataArray): - raise NotImplementedError( - "xarray version of find_axis_from_coord not implemented." - ) + return xr_utils.find_axis_from_dim_coord(variable_arr, coord_name) else: raise ValueError("variable_arr must be Iris Cube or Xarray DataArray") diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 641be123..69199515 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -4,11 +4,134 @@ from typing import Union +import numpy as np import xarray as xr from . import general_internal as tb_utils_gi -def find_vertical_axis_from_coord( +def find_axis_from_dim_coord( + in_da: xr.DataArray, dim_coord_name: str +) -> Union[int, None]: + """Finds the axis number in an xarray dataarray given a coordinate or + dimension name. + + Parameters + ---------- + in_da: xarray.DataArray + Input variable array + dim_coord_name: str + coordinate or dimension to look for + + Returns + ------- + axis_number: int + the number of the axis of the given coordinate, or None if the coordinate + is not found in the cube or not a dimensional coordinate + """ + + try: + dim_axis = find_axis_from_dim(in_da, dim_coord_name) + except ValueError: + dim_axis = None + + try: + coord_axes = find_axis_from_coord(in_da, dim_coord_name) + except ValueError: + coord_axes = tuple() + + if dim_axis is None and len(coord_axes) == 0: + raise ValueError("Coordinate/Dimension " + dim_coord_name + " not found.") + + # if we find a dimension with an axis and/or the coordinates, return that axis number + if dim_axis == coord_axes[0] and len(coord_axes) == 1: + return dim_axis + if len(coord_axes) == 0 and dim_axis is not None: + return dim_axis + if dim_axis is None and len(coord_axes) == 1: + return coord_axes[0] + # odd case- we have a coordinate and a dimension with the same name, but the + # coordinate has multiple dimensions, while the dimension is clearly only one axis + # pass along the dimension one as long as it is represented in the + # list of coordinate dimensions. + if len(coord_axes) > 1 and dim_axis is not None and dim_axis in coord_axes: + return dim_axis + + if dim_axis is not None and len(coord_axes) > 1 and dim_axis not in coord_axes: + raise ValueError("Dimension and coordinate share a name but not an axis.") + + return None + + +def find_axis_from_dim(in_da: xr.DataArray, dim_name: str) -> Union[int, None]: + """ + Finds the axis number from a DataArray dimension name + + Parameters + ---------- + in_da: xarray.DataArray + Input DataArray to find the axis number from + dim_name: str + Name of the dimension + + Returns + ------- + int or None + axis number or None if axis isn't found + + Raises + ------ + ValueError + raises ValueError if dim_name matches multiple dimensions + """ + list_dims = in_da.dims + all_matching_dims = list(set(list_dims) & {dim_name}) + if len(all_matching_dims) == 1: + return list_dims.index(all_matching_dims[0]) + if len(all_matching_dims) > 1: + raise ValueError("Too many matching dimensions") + return None + + +def find_axis_from_coord(in_da: xr.DataArray, coord_name: str) -> tuple[int]: + """ + Finds the axis number from a DataArray coordinate name + + Parameters + ---------- + in_da: xarray.DataArray + Input DataArray to find the axis number from + coord_name: str + Name of the coordinate + + Returns + ------- + tuple of int + axis number(s) + + Raises + ------ + ValueError + raises ValueError if the coordinate has more than 1 axis or 0 axes; or if + there are >1 matching coordinate of that name + """ + list_coords = in_da.coords + all_matching_coords = list(set(list_coords) & {coord_name}) + if len(all_matching_coords) == 1: + curr_coord = list_coords[all_matching_coords[0]] + return tuple( + ( + find_axis_from_dim(in_da, x) + for x in curr_coord.dims + if find_axis_from_dim(in_da, x) is not None + ) + ) + + if len(all_matching_coords) > 1: + raise ValueError("Too many matching coords") + return tuple() + + +def find_vertical_coord_name( variable_cube: xr.DataArray, vertical_coord: Union[str, None] = None, ) -> str: @@ -16,7 +139,7 @@ def find_vertical_axis_from_coord( Parameters ---------- - variable_cube: iris.cube.Cube or xarray.DataArray + variable_cube: xarray.DataArray Input variable cube, containing a vertical coordinate. vertical_coord: str Vertical coordinate name. If None, this function tries to auto-detect. @@ -48,3 +171,74 @@ def find_vertical_axis_from_coord( if vertical_coord in list_coord_names: return vertical_coord raise ValueError("Please specify vertical coordinate found in cube") + + +def find_hdim_axes_3d( + field_in: xr.DataArray, + vertical_coord: Union[str, None] = None, + vertical_axis: Union[int, None] = None, + time_dim_coord_name: str = "time", +) -> tuple[int]: + """Finds what the hdim axes are given a 3D (including z) or + 4D (including z and time) dataset. + + Parameters + ---------- + field_in: xarray.DataArray + Input field, can be 3D or 4D + vertical_coord: str or None + The name of the vertical coord, or None, which will attempt to find + the vertical coordinate name + vertical_axis: int or None + The axis number of the vertical coordinate, or None. Note + that only one of vertical_axis or vertical_coord can be set. + time_dim_coord_name: str + Name of the time dimension/coordinate + + Returns + ------- + (hdim_1_axis, hdim_2_axis): (int, int) + The axes for hdim_1 and hdim_2 + """ + + if vertical_coord is not None and vertical_axis is not None: + if vertical_coord != "auto": + raise ValueError("Cannot set both vertical_coord and vertical_axis.") + + time_axis = find_axis_from_dim_coord(field_in, time_dim_coord_name) + # we have already specified the axis. + if vertical_axis is not None: + vertical_coord_axis = vertical_axis + vert_coord_found = True + else: + try: + vertical_coord_name = find_vertical_coord_name( + field_in, vertical_coord=vertical_coord + ) + except ValueError: + vert_coord_found = False + else: + vert_coord_found = True + ndim_vertical = find_axis_from_dim_coord(field_in, vertical_coord_name) + if ndim_vertical is None: + raise ValueError( + "please specify 1 dimensional vertical coordinate." + f" Current vertical coordinates: {ndim_vertical}" + ) + vertical_coord_axis = ndim_vertical + + if not vert_coord_found: + # if we don't have a vertical coordinate, and we are 3D or lower + # that is okay. + if (field_in.ndim == 3 and time_axis is not None) or field_in.ndim < 3: + vertical_coord_axis = None + else: + raise ValueError("No suitable vertical coordinate found") + # Once we know the vertical coordinate, we can resolve the + # horizontal coordinates + + all_axes = np.arange(0, field_in.ndim) + output_vals = tuple( + all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))] + ) + return output_vals From a3b5751cc6577fa0c861a1c1f045501c1465c148 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 21 Sep 2023 08:18:11 -0700 Subject: [PATCH 07/32] Added additional tests to new xarray utilities --- tobac/tests/test_xarray_utils.py | 144 +++++++++++++++++++++++++++ tobac/utils/internal/xarray_utils.py | 40 ++++---- 2 files changed, 166 insertions(+), 18 deletions(-) create mode 100644 tobac/tests/test_xarray_utils.py diff --git a/tobac/tests/test_xarray_utils.py b/tobac/tests/test_xarray_utils.py new file mode 100644 index 00000000..643058f6 --- /dev/null +++ b/tobac/tests/test_xarray_utils.py @@ -0,0 +1,144 @@ +"""Tests for tobac.utils.internal_utils.xarray_utils +""" + + +import tobac.utils.internal as internal_utils +import tobac.utils.internal.xarray_utils as xr_utils +import tobac.testing as tbtest + +from typing import Union + +import pytest +import numpy as np +import xarray as xr + + +@pytest.mark.parametrize( + "dim_names, coord_dim_map, coord_looking_for, expected_out, expected_raise", + [ + ( + ("time", "altitude", "x", "y"), # dim_names + { # coord_dim_map + "time": ("time",), + "latitude": ("x", "y"), + "longitude": ("x", "y"), + "altmsl": ("altitude", "x", "y"), + }, + "time", # coord_looking_for + 0, + False, + ), + ( + ("time", "time", "time", "time", "time"), # dim_names + { # coord_dim_map + "time": ("time",), + }, + "time", # coord_looking_for + 0, + True, + ), + ( + ("time", "altitude", "x", "y"), # dim_names + { # coord_dim_map + "time": ("time",), + "latitude": ("x", "y"), + "longitude": ("x", "y"), + "altmsl": ("altitude", "x", "y"), + }, + "altitude", # coord_looking_for + 1, + False, + ), + ( + ("time", "altitude", "x", "y"), # dim_names + { # coord_dim_map + "time": ("time",), + "latitude": ("x", "y"), + "longitude": ("x", "y"), + "altmsl": ("altitude", "x", "y"), + }, + "latitude", # coord_looking_for + None, + False, + ), + ( + ("time", "altitude", "x", "y"), # dim_names + { # coord_dim_map + "time": ("time",), + "latitude": ("x", "y"), + "longitude": ("x", "y"), + "altmsl": ("altitude", "x", "y"), + }, + "x", # coord_looking_for + 2, + False, + ), + ( + ("time", "altitude", "x", "y"), # dim_names + { # coord_dim_map + "time": ("time",), + "latitude": ("x", "y"), + "longitude": ("x", "y"), + "altmsl": ("altitude", "x", "y"), + }, + "z", # coord_looking_for + 2, + True, + ), + ( + ("time", "altitude", "x", "y"), # dim_names + { # coord_dim_map + "t": ("time",), + "latitude": ("x", "y"), + "longitude": ("x", "y"), + "altmsl": ("altitude", "x", "y"), + }, + "t", # coord_looking_for + 0, + False, + ), + ], +) +def test_find_axis_from_dim_coord( + dim_names: tuple[str], + coord_dim_map: dict[str : tuple[str],], + coord_looking_for: str, + expected_out: Union[int, None], + expected_raise: bool, +): + """Tests tobac.utils.internal.file_hdim_axes_3D + + Parameters + ---------- + dim_names: tuple[str] + Names of the dimensions to have + coord_dim_map: dict[str : tuple[str],] + Mapping of coordinates (keys) to dimensions (values) + coord_looking_for: str + what coordinate/dimension to look for + expected_out: Union[int, None] + What the expected output is + expected_raise: bool + Whether or not we expect a raise + """ + + # size of the array per dimension + arr_sz = 4 + arr_da = np.empty((arr_sz,) * len(dim_names)) + coord_vals = {} + for coord_nm in coord_dim_map: + coord_vals[coord_nm] = ( + coord_dim_map[coord_nm], + np.empty((arr_sz,) * len(coord_dim_map[coord_nm])), + ) + + xr_da = xr.DataArray(arr_da, dims=dim_names, coords=coord_vals) + if expected_raise: + with pytest.raises(ValueError): + _ = xr_utils.find_axis_from_dim_coord(xr_da, coord_looking_for) + else: + out_val = xr_utils.find_axis_from_dim_coord(xr_da, coord_looking_for) + if expected_out is not None: + assert out_val == expected_out + else: + assert out_val is None diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 69199515..1396619f 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -27,12 +27,15 @@ def find_axis_from_dim_coord( axis_number: int the number of the axis of the given coordinate, or None if the coordinate is not found in the cube or not a dimensional coordinate + + Raises + ------ + ValueError + Returns ValueError if there are more than one matching dimension name or + if the dimension/coordinate isn't found. """ - try: - dim_axis = find_axis_from_dim(in_da, dim_coord_name) - except ValueError: - dim_axis = None + dim_axis = find_axis_from_dim(in_da, dim_coord_name) try: coord_axes = find_axis_from_coord(in_da, dim_coord_name) @@ -43,21 +46,12 @@ def find_axis_from_dim_coord( raise ValueError("Coordinate/Dimension " + dim_coord_name + " not found.") # if we find a dimension with an axis and/or the coordinates, return that axis number - if dim_axis == coord_axes[0] and len(coord_axes) == 1: + if len(coord_axes) == 1 and dim_axis == coord_axes[0]: return dim_axis if len(coord_axes) == 0 and dim_axis is not None: return dim_axis if dim_axis is None and len(coord_axes) == 1: return coord_axes[0] - # odd case- we have a coordinate and a dimension with the same name, but the - # coordinate has multiple dimensions, while the dimension is clearly only one axis - # pass along the dimension one as long as it is represented in the - # list of coordinate dimensions. - if len(coord_axes) > 1 and dim_axis is not None and dim_axis in coord_axes: - return dim_axis - - if dim_axis is not None and len(coord_axes) > 1 and dim_axis not in coord_axes: - raise ValueError("Dimension and coordinate share a name but not an axis.") return None @@ -84,11 +78,21 @@ def find_axis_from_dim(in_da: xr.DataArray, dim_name: str) -> Union[int, None]: raises ValueError if dim_name matches multiple dimensions """ list_dims = in_da.dims - all_matching_dims = list(set(list_dims) & {dim_name}) + all_matching_dims = [ + dim + for dim in list_dims + if dim + in [ + dim_name, + ] + ] if len(all_matching_dims) == 1: return list_dims.index(all_matching_dims[0]) if len(all_matching_dims) > 1: - raise ValueError("Too many matching dimensions") + raise ValueError( + "More than one matching dimension. Need to specify which axis number or rename " + "your dimensions." + ) return None @@ -164,9 +168,9 @@ def find_vertical_coord_name( ) if len(all_vertical_axes) >= 1: return all_vertical_axes[0] + coord_names_err = str(tuple(tb_utils_gi.COMMON_VERT_COORDS)) raise ValueError( - "Cube lacks suitable automatic vertical coordinate (z, model_level_number, " - "altitude, or geopotential_height)" + "Cube lacks suitable automatic vertical coordinate " + coord_names_err ) if vertical_coord in list_coord_names: return vertical_coord From 76690d3beaac690e5500abb8af598d49fe6953ef Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 21 Sep 2023 08:38:28 -0700 Subject: [PATCH 08/32] switch feature_detection_multithreshold_timestep to xarray --- tobac/feature_detection.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index ca51fe1d..5be3cd3f 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -32,6 +32,7 @@ # from typing_extensions import Literal import iris +import iris.cube import xarray as xr @@ -880,8 +881,9 @@ def feature_detection_threshold( return features_threshold, regions +@internal_utils.irispandas_to_xarray def feature_detection_multithreshold_timestep( - data_i: np.array, + data_i: xr.DataArray, i_time: int, threshold: list[float] = None, min_num: int = 0, @@ -909,7 +911,7 @@ def feature_detection_multithreshold_timestep( Parameters ---------- - data_i : iris.cube.Cube + data_i : iris.cube.Cube or xarray.DataArray 3D field to perform the feature detection (single timestep) on. i_time : int @@ -985,7 +987,7 @@ def feature_detection_multithreshold_timestep( ) # get actual numpy array and make a copy so as not to change the data in the iris cube - track_data = data_i.core_data().copy() + track_data = data_i.values.copy() track_data = gaussian_filter( track_data, sigma=sigma_threshold From 110a205c1bebc0f5f348853e8212288fa80d6bfb Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 21 Sep 2023 09:56:08 -0700 Subject: [PATCH 09/32] first step of switch feature_detection_multithreshold to xarray --- tobac/feature_detection.py | 33 ++- tobac/utils/general.py | 271 +++--------------------- tobac/utils/internal/iris_utils.py | 305 +++++++++++++++++++++++++++ tobac/utils/internal/xarray_utils.py | 6 +- 4 files changed, 347 insertions(+), 268 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 5be3cd3f..490efcb2 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1100,8 +1100,9 @@ def feature_detection_multithreshold_timestep( return features_thresholds +@internal_utils.irispandas_to_xarray def feature_detection_multithreshold( - field_in: iris.cube.Cube, + field_in: xr.DataArray, dxy: float = None, threshold: list[float] = None, min_num: int = 0, @@ -1128,8 +1129,8 @@ def feature_detection_multithreshold( Parameters ---------- - field_in : iris.cube.Cube - 2D field to perform the tracking on (needs to have coordinate + field_in : iris.cube.Cube or xarray.DataArray + 2D or 3D field to perform the tracking on (needs to have coordinate 'time' along one of its dimensions), dxy : float @@ -1216,11 +1217,14 @@ def feature_detection_multithreshold( """ from .utils import add_coordinates, add_coordinates_3D + time_var_name: str = "time" logging.debug("start feature detection based on thresholds") - if "time" not in [coord.name() for coord in field_in.coords()]: + ndim_time = internal_utils.find_axis_from_coord(field_in, time_var_name) + if ndim_time is None: raise ValueError( - "input to feature detection step must include a dimension named 'time'" + "input to feature detection step must include a dimension named " + + time_var_name ) # Check whether we need to run 2D or 3D feature detection @@ -1233,8 +1237,6 @@ def feature_detection_multithreshold( else: raise ValueError("Feature detection only works with 2D or 3D data") - ndim_time = field_in.coord_dims("time")[0] - if detect_subset is not None: raise NotImplementedError("Subsetting feature detection not yet supported.") @@ -1276,9 +1278,6 @@ def feature_detection_multithreshold( # create empty list to store features for all timesteps list_features_timesteps = [] - # loop over timesteps for feature identification: - data_time = field_in.slices_over("time") - # if single threshold is put in as a single value, turn it into a list if type(threshold) in [int, float]: threshold = [threshold] @@ -1317,8 +1316,8 @@ def feature_detection_multithreshold( "given in meter." ) - for i_time, data_i in enumerate(data_time): - time_i = data_i.coord("time").units.num2date(data_i.coord("time").points[0]) + for i_time, data_i in enumerate(field_in.transpose(time_var_name, ...)): + time_i = data_i[time_var_name].values features_thresholds = feature_detection_multithreshold_timestep( data_i, @@ -1365,9 +1364,7 @@ def feature_detection_multithreshold( list_features_timesteps.append(features_thresholds) - logging.debug( - "Finished feature detection for " + time_i.strftime("%Y-%m-%d_%H:%M:%S") - ) + logging.debug("Finished feature detection for %s", time_i) logging.debug("feature detection: merging DataFrames") # Check if features are detected and then concatenate features from different timesteps into @@ -1380,15 +1377,15 @@ def feature_detection_multithreshold( # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) if "vdim" in features: features = add_coordinates_3D( - features, field_in, vertical_coord=vertical_coord + features, field_in.to_iris(), vertical_coord=vertical_coord ) else: - features = add_coordinates(features, field_in) + features = add_coordinates(features, field_in.to_iris()) else: features = None logging.debug("No features detected") logging.debug("feature detection completed") - return features + return features.to_xarray() def filter_min_distance( diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 7836a9c6..91152bb7 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -3,7 +3,9 @@ """ import copy import logging +from typing import Union +import iris.cube import pandas as pd from . import internal as internal_utils @@ -15,7 +17,9 @@ import warnings -def add_coordinates(t, variable_cube): +def add_coordinates( + t: pd.DataFrame, variable_cube: Union[xr.DataArray, iris.cube.Cube] +) -> pd.DataFrame: """Add coordinates from the input cube of the feature detection to the trajectories/features. @@ -35,135 +39,21 @@ def add_coordinates(t, variable_cube): Trajectories with added coordinates. """ - - from scipy.interpolate import interp1d, interpn - - logging.debug("start adding coordinates from cube") - - # pull time as datetime object and timestr from input data and add it to DataFrame: - t["time"] = None - t["timestr"] = None - - logging.debug("adding time coordinate") - - time_in = variable_cube.coord("time") - time_in_datetime = time_in.units.num2date(time_in.points) - - t["time"] = time_in_datetime[t["frame"]] - t["timestr"] = [ - x.strftime("%Y-%m-%d %H:%M:%S") for x in time_in_datetime[t["frame"]] - ] - - # Get list of all coordinates in input cube except for time (already treated): - coord_names = [coord.name() for coord in variable_cube.coords()] - coord_names.remove("time") - - logging.debug("time coordinate added") - - # chose right dimension for horizontal axis based on time dimension: - ndim_time = variable_cube.coord_dims("time")[0] - if ndim_time == 0: - hdim_1 = 1 - hdim_2 = 2 - elif ndim_time == 1: - hdim_1 = 0 - hdim_2 = 2 - elif ndim_time == 2: - hdim_1 = 0 - hdim_2 = 1 - - # create vectors to use to interpolate from pixels to coordinates - dimvec_1 = np.arange(variable_cube.shape[hdim_1]) - dimvec_2 = np.arange(variable_cube.shape[hdim_2]) - - # loop over coordinates in input data: - for coord in coord_names: - logging.debug("adding coord: " + coord) - # interpolate 2D coordinates: - if variable_cube.coord(coord).ndim == 1: - if variable_cube.coord_dims(coord) == (hdim_1,): - f = interp1d( - dimvec_1, - variable_cube.coord(coord).points, - fill_value="extrapolate", - ) - coordinate_points = f(t["hdim_1"]) - - if variable_cube.coord_dims(coord) == (hdim_2,): - f = interp1d( - dimvec_2, - variable_cube.coord(coord).points, - fill_value="extrapolate", - ) - coordinate_points = f(t["hdim_2"]) - - # interpolate 2D coordinates: - elif variable_cube.coord(coord).ndim == 2: - if variable_cube.coord_dims(coord) == (hdim_1, hdim_2): - points = (dimvec_1, dimvec_2) - values = variable_cube.coord(coord).points - xi = np.column_stack((t["hdim_1"], t["hdim_2"])) - coordinate_points = interpn(points, values, xi) - - if variable_cube.coord_dims(coord) == (hdim_2, hdim_1): - points = (dimvec_2, dimvec_1) - values = variable_cube.coord(coord).points - xi = np.column_stack((t["hdim_2"], t["hdim_1"])) - coordinate_points = interpn(points, values, xi) - - # interpolate 3D coordinates: - # mainly workaround for wrf latitude and longitude (to be fixed in future) - - elif variable_cube.coord(coord).ndim == 3: - if variable_cube.coord_dims(coord) == (ndim_time, hdim_1, hdim_2): - points = (dimvec_1, dimvec_2) - values = variable_cube[0, :, :].coord(coord).points - xi = np.column_stack((t["hdim_1"], t["hdim_2"])) - coordinate_points = interpn(points, values, xi) - - if variable_cube.coord_dims(coord) == (ndim_time, hdim_2, hdim_1): - points = (dimvec_2, dimvec_1) - values = variable_cube[0, :, :].coord(coord).points - xi = np.column_stack((t["hdim_2"], t["hdim_1"])) - coordinate_points = interpn(points, values, xi) - - if variable_cube.coord_dims(coord) == (hdim_1, ndim_time, hdim_2): - points = (dimvec_1, dimvec_2) - values = variable_cube[:, 0, :].coord(coord).points - xi = np.column_stack((t["hdim_1"], t["hdim_2"])) - coordinate_points = interpn(points, values, xi) - - if variable_cube.coord_dims(coord) == (hdim_1, hdim_2, ndim_time): - points = (dimvec_1, dimvec_2) - values = variable_cube[:, :, 0].coord(coord).points - xi = np.column_stack((t["hdim_1"], t["hdim_2"])) - coordinate_points = interpn(points, values, xi) - - if variable_cube.coord_dims(coord) == (hdim_2, ndim_time, hdim_1): - points = (dimvec_2, dimvec_1) - values = variable_cube[:, 0, :].coord(coord).points - xi = np.column_stack((t["hdim_2"], t["hdim_1"])) - coordinate_points = interpn(points, values, xi) - - if variable_cube.coord_dims(coord) == (hdim_2, hdim_1, ndim_time): - points = (dimvec_2, dimvec_1) - values = variable_cube[:, :, 0].coord(coord).points - xi = np.column_stack((t["hdim_2"], t["hdim_1"])) - coordinate_points = interpn(points, values, xi) - - # write resulting array or list into DataFrame: - t[coord] = coordinate_points - - logging.debug("added coord: " + coord) - return t + if isinstance(variable_cube, iris.cube.Cube): + return internal_utils.iris_utils.add_coordinates(t, variable_cube) + if isinstance(variable_cube, xr.DataArray): + raise NotImplementedError("add_coordinates not implemented for xarray.") + raise ValueError( + "add_coordinates only supports xarray.DataArray and iris.cube.Cube" + ) def add_coordinates_3D( - t, - variable_cube, - vertical_coord=None, - vertical_axis=None, - assume_coords_fixed_in_time=True, + t: pd.DataFrame, + variable_cube: Union[xr.DataArray, iris.cube.Cube], + vertical_coord: Union[str, int] = None, + vertical_axis: Union[int, None] = None, + assume_coords_fixed_in_time: bool = True, ): """Function adding coordinates from the tracking cube to the trajectories for the 3D case: time, longitude&latitude, x&y dimensions, and altitude @@ -196,129 +86,16 @@ def add_coordinates_3D( pandas DataFrame trajectories with added coordinates """ - from scipy.interpolate import interp2d, interp1d, interpn - - logging.debug("start adding coordinates from cube") - - # pull time as datetime object and timestr from input data and add it to DataFrame: - t["time"] = None - t["timestr"] = None - - logging.debug("adding time coordinate") - - time_in = variable_cube.coord("time") - time_in_datetime = time_in.units.num2date(time_in.points) - - t["time"] = time_in_datetime[t["frame"]] - t["timestr"] = [ - x.strftime("%Y-%m-%d %H:%M:%S") for x in time_in_datetime[t["frame"]] - ] - - # Get list of all coordinates in input cube except for time (already treated): - coord_names = [coord.name() for coord in variable_cube.coords()] - coord_names.remove("time") - - logging.debug("time coordinate added") - - # chose right dimension for horizontal and vertical axes based on time dimension: - ndim_time = variable_cube.coord_dims("time")[0] - - if type(vertical_coord) is int: - ndim_vertical = vertical_coord - vertical_axis = None - else: - vertical_axis = internal_utils.find_vertical_axis_from_coord( - variable_cube, vertical_coord=vertical_coord + if isinstance(variable_cube, iris.cube.Cube): + return internal_utils.iris_utils.add_coordinates_3D( + t, variable_cube, vertical_coord, vertical_axis, assume_coords_fixed_in_time ) - - if vertical_axis is not None: - ndim_vertical = internal_utils.find_axis_from_coord( - variable_cube, vertical_axis - ) - if ndim_vertical is None: - raise ValueError("Vertical Coordinate not found") - - # We need to figure out the axis number of hdim_1 and hdim_2. - ndim_hdim_1, ndim_hdim_2 = internal_utils.find_hdim_axes_3D( - variable_cube, vertical_axis=ndim_vertical + if isinstance(variable_cube, xr.DataArray): + raise NotImplementedError("add_coordinates_3D not implemented for xarray.") + raise ValueError( + "add_coordinates_3D only supports xarray.DataArray and iris.cube.Cube" ) - if ndim_hdim_1 is None or ndim_hdim_2 is None: - raise ValueError("Could not find hdim coordinates.") - - # create vectors to use to interpolate from pixels to coordinates - dimvec_1 = np.arange(variable_cube.shape[ndim_vertical]) - dimvec_2 = np.arange(variable_cube.shape[ndim_hdim_1]) - dimvec_3 = np.arange(variable_cube.shape[ndim_hdim_2]) - dimvec_time = np.arange(variable_cube.shape[ndim_time]) - - coord_to_ax = { - ndim_vertical: (dimvec_1, "vdim"), - ndim_time: (dimvec_time, "time"), - ndim_hdim_1: (dimvec_2, "hdim_1"), - ndim_hdim_2: (dimvec_3, "hdim_2"), - } - - # loop over coordinates in input data: - for coord in coord_names: - logging.debug("adding coord: " + coord) - # interpolate 1D coordinates: - var_coord = variable_cube.coord(coord) - if var_coord.ndim == 1: - curr_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] - f = interp1d(curr_dim[0], var_coord.points, fill_value="extrapolate") - coordinate_points = f(t[curr_dim[1]]) - - # interpolate 2D coordinates - elif var_coord.ndim == 2: - first_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] - second_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] - points = (second_dim[0], first_dim[0]) - values = var_coord.points - xi = np.column_stack((t[second_dim[1]], t[first_dim[1]])) - coordinate_points = interpn(points, values, xi) - - # Deal with the special case where the coordinate is 3D but - # one of the dimensions is time and we assume the coordinates - # don't vary in time. - elif ( - var_coord.ndim == 3 - and ndim_time in variable_cube.coord_dims(coord) - and assume_coords_fixed_in_time - ): - time_pos = variable_cube.coord_dims(coord).index(ndim_time) - hdim1_pos = 0 if time_pos != 0 else 1 - hdim2_pos = 1 if time_pos == 2 else 2 - first_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim2_pos]] - second_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim1_pos]] - points = (second_dim[0], first_dim[0]) - values = var_coord.points - xi = np.column_stack((t[second_dim[1]], t[first_dim[1]])) - coordinate_points = interpn(points, values, xi) - - # interpolate 3D coordinates: - elif var_coord.ndim == 3: - first_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] - second_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] - third_dim = coord_to_ax[variable_cube.coord_dims(coord)[2]] - coordinate_points = interpn( - [first_dim[0], second_dim[0], third_dim[0]], - var_coord.points, - [ - [a, b, c] - for a, b, c in zip( - t[first_dim[1]], t[second_dim[1]], t[third_dim[1]] - ) - ], - ) - # coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] - - # write resulting array or list into DataFrame: - t[coord] = coordinate_points - - logging.debug("added coord: " + coord) - return t - def get_bounding_box(x, buffer=1): """Finds the bounding box of a ndarray, i.e. the smallest diff --git a/tobac/utils/internal/iris_utils.py b/tobac/utils/internal/iris_utils.py index 029061cb..53de4d32 100644 --- a/tobac/utils/internal/iris_utils.py +++ b/tobac/utils/internal/iris_utils.py @@ -4,10 +4,12 @@ from __future__ import annotations from typing import Union +import logging import iris import iris.cube import numpy as np +import pandas as pd from . import general_internal as tb_utils_gi @@ -154,3 +156,306 @@ def find_hdim_axes_3d( all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))] ) return output_vals + + +def add_coordinates(t: pd.DataFrame, variable_cube: iris.cube.Cube) -> pd.DataFrame: + """Add coordinates from the input cube of the feature detection + to the trajectories/features. + + Parameters + ---------- + t : pandas.DataFrame + Trajectories/features from feature detection or linking step. + + variable_cube : iris.cube.Cube + Input data used for the tracking with coordinate information + to transfer to the resulting DataFrame. Needs to contain the + coordinate 'time'. + + Returns + ------- + t : pandas.DataFrame + Trajectories with added coordinates. + + """ + + from scipy.interpolate import interp1d, interpn + + logging.debug("start adding coordinates from cube") + + # pull time as datetime object and timestr from input data and add it to DataFrame: + t["time"] = None + t["timestr"] = None + + logging.debug("adding time coordinate") + + time_in = variable_cube.coord("time") + time_in_datetime = time_in.units.num2date(time_in.points) + + t["time"] = time_in_datetime[t["frame"]] + t["timestr"] = [ + x.strftime("%Y-%m-%d %H:%M:%S") for x in time_in_datetime[t["frame"]] + ] + + # Get list of all coordinates in input cube except for time (already treated): + coord_names = [coord.name() for coord in variable_cube.coords()] + coord_names.remove("time") + + logging.debug("time coordinate added") + + # chose right dimension for horizontal axis based on time dimension: + ndim_time = variable_cube.coord_dims("time")[0] + if ndim_time == 0: + hdim_1 = 1 + hdim_2 = 2 + elif ndim_time == 1: + hdim_1 = 0 + hdim_2 = 2 + elif ndim_time == 2: + hdim_1 = 0 + hdim_2 = 1 + + # create vectors to use to interpolate from pixels to coordinates + dimvec_1 = np.arange(variable_cube.shape[hdim_1]) + dimvec_2 = np.arange(variable_cube.shape[hdim_2]) + + # loop over coordinates in input data: + for coord in coord_names: + logging.debug("adding coord: %s", coord) + # interpolate 2D coordinates: + if variable_cube.coord(coord).ndim == 1: + if variable_cube.coord_dims(coord) == (hdim_1,): + f = interp1d( + dimvec_1, + variable_cube.coord(coord).points, + fill_value="extrapolate", + ) + coordinate_points = f(t["hdim_1"]) + + if variable_cube.coord_dims(coord) == (hdim_2,): + f = interp1d( + dimvec_2, + variable_cube.coord(coord).points, + fill_value="extrapolate", + ) + coordinate_points = f(t["hdim_2"]) + + # interpolate 2D coordinates: + elif variable_cube.coord(coord).ndim == 2: + if variable_cube.coord_dims(coord) == (hdim_1, hdim_2): + points = (dimvec_1, dimvec_2) + values = variable_cube.coord(coord).points + xi = np.column_stack((t["hdim_1"], t["hdim_2"])) + coordinate_points = interpn(points, values, xi) + + if variable_cube.coord_dims(coord) == (hdim_2, hdim_1): + points = (dimvec_2, dimvec_1) + values = variable_cube.coord(coord).points + xi = np.column_stack((t["hdim_2"], t["hdim_1"])) + coordinate_points = interpn(points, values, xi) + + # interpolate 3D coordinates: + # mainly workaround for wrf latitude and longitude (to be fixed in future) + + elif variable_cube.coord(coord).ndim == 3: + if variable_cube.coord_dims(coord) == (ndim_time, hdim_1, hdim_2): + points = (dimvec_1, dimvec_2) + values = variable_cube[0, :, :].coord(coord).points + xi = np.column_stack((t["hdim_1"], t["hdim_2"])) + coordinate_points = interpn(points, values, xi) + + if variable_cube.coord_dims(coord) == (ndim_time, hdim_2, hdim_1): + points = (dimvec_2, dimvec_1) + values = variable_cube[0, :, :].coord(coord).points + xi = np.column_stack((t["hdim_2"], t["hdim_1"])) + coordinate_points = interpn(points, values, xi) + + if variable_cube.coord_dims(coord) == (hdim_1, ndim_time, hdim_2): + points = (dimvec_1, dimvec_2) + values = variable_cube[:, 0, :].coord(coord).points + xi = np.column_stack((t["hdim_1"], t["hdim_2"])) + coordinate_points = interpn(points, values, xi) + + if variable_cube.coord_dims(coord) == (hdim_1, hdim_2, ndim_time): + points = (dimvec_1, dimvec_2) + values = variable_cube[:, :, 0].coord(coord).points + xi = np.column_stack((t["hdim_1"], t["hdim_2"])) + coordinate_points = interpn(points, values, xi) + + if variable_cube.coord_dims(coord) == (hdim_2, ndim_time, hdim_1): + points = (dimvec_2, dimvec_1) + values = variable_cube[:, 0, :].coord(coord).points + xi = np.column_stack((t["hdim_2"], t["hdim_1"])) + coordinate_points = interpn(points, values, xi) + + if variable_cube.coord_dims(coord) == (hdim_2, hdim_1, ndim_time): + points = (dimvec_2, dimvec_1) + values = variable_cube[:, :, 0].coord(coord).points + xi = np.column_stack((t["hdim_2"], t["hdim_1"])) + coordinate_points = interpn(points, values, xi) + + # write resulting array or list into DataFrame: + t[coord] = coordinate_points + + logging.debug("added coord: " + coord) + return t + + +def add_coordinates_3D( + t: pd.DataFrame, + variable_cube: iris.cube.Cube, + vertical_coord: Union[str, int] = None, + vertical_axis: Union[int, None] = None, + assume_coords_fixed_in_time=True, +): + """Function adding coordinates from the tracking cube to the trajectories + for the 3D case: time, longitude&latitude, x&y dimensions, and altitude + + Parameters + ---------- + t: pandas DataFrame + trajectories/features + variable_cube: iris.cube.Cube + Cube (usually the one you are tracking on) at least conaining the dimension of 'time'. + Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate', + and 'altitude' (if 3D) are the coordinates that we expect, although this function + will happily interpolate along any dimension coordinates you give. + vertical_coord: str or int + Name or axis number of the vertical coordinate. If None, tries to auto-detect. + If it is a string, it looks for the coordinate or the dimension name corresponding + to the string. If it is an int, it assumes that it is the vertical axis. + Note that if you only have a 2D or 3D coordinate for altitude, you must + pass in an int. + vertical_axis: int or None + Axis number of the vertical. + assume_coords_fixed_in_time: bool + If true, it assumes that the coordinates are fixed in time, even if the + coordinates say they vary in time. This is, by default, True, to preserve + legacy functionality. If False, it assumes that if a coordinate says + it varies in time, it takes the coordinate at its word. + + Returns + ------- + pandas DataFrame + trajectories with added coordinates + """ + from scipy.interpolate import interp2d, interp1d, interpn + + logging.debug("start adding coordinates from cube") + + # pull time as datetime object and timestr from input data and add it to DataFrame: + t["time"] = None + t["timestr"] = None + + logging.debug("adding time coordinate") + + time_in = variable_cube.coord("time") + time_in_datetime = time_in.units.num2date(time_in.points) + + t["time"] = time_in_datetime[t["frame"]] + t["timestr"] = [ + x.strftime("%Y-%m-%d %H:%M:%S") for x in time_in_datetime[t["frame"]] + ] + + # Get list of all coordinates in input cube except for time (already treated): + coord_names = [coord.name() for coord in variable_cube.coords()] + coord_names.remove("time") + + logging.debug("time coordinate added") + + # chose right dimension for horizontal and vertical axes based on time dimension: + ndim_time = variable_cube.coord_dims("time")[0] + + if type(vertical_coord) is int: + ndim_vertical = vertical_coord + vertical_axis = None + else: + vertical_axis = tb_utils_gi.find_vertical_axis_from_coord( + variable_cube, vertical_coord=vertical_coord + ) + + if vertical_axis is not None: + ndim_vertical = tb_utils_gi.find_axis_from_coord(variable_cube, vertical_axis) + if ndim_vertical is None: + raise ValueError("Vertical Coordinate not found") + + # We need to figure out the axis number of hdim_1 and hdim_2. + ndim_hdim_1, ndim_hdim_2 = tb_utils_gi.find_hdim_axes_3D( + variable_cube, vertical_axis=ndim_vertical + ) + + if ndim_hdim_1 is None or ndim_hdim_2 is None: + raise ValueError("Could not find hdim coordinates.") + + # create vectors to use to interpolate from pixels to coordinates + dimvec_1 = np.arange(variable_cube.shape[ndim_vertical]) + dimvec_2 = np.arange(variable_cube.shape[ndim_hdim_1]) + dimvec_3 = np.arange(variable_cube.shape[ndim_hdim_2]) + dimvec_time = np.arange(variable_cube.shape[ndim_time]) + + coord_to_ax = { + ndim_vertical: (dimvec_1, "vdim"), + ndim_time: (dimvec_time, "time"), + ndim_hdim_1: (dimvec_2, "hdim_1"), + ndim_hdim_2: (dimvec_3, "hdim_2"), + } + + # loop over coordinates in input data: + for coord in coord_names: + logging.debug("adding coord: " + coord) + # interpolate 1D coordinates: + var_coord = variable_cube.coord(coord) + if var_coord.ndim == 1: + curr_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] + f = interp1d(curr_dim[0], var_coord.points, fill_value="extrapolate") + coordinate_points = f(t[curr_dim[1]]) + + # interpolate 2D coordinates + elif var_coord.ndim == 2: + first_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] + second_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] + points = (second_dim[0], first_dim[0]) + values = var_coord.points + xi = np.column_stack((t[second_dim[1]], t[first_dim[1]])) + coordinate_points = interpn(points, values, xi) + + # Deal with the special case where the coordinate is 3D but + # one of the dimensions is time and we assume the coordinates + # don't vary in time. + elif ( + var_coord.ndim == 3 + and ndim_time in variable_cube.coord_dims(coord) + and assume_coords_fixed_in_time + ): + time_pos = variable_cube.coord_dims(coord).index(ndim_time) + hdim1_pos = 0 if time_pos != 0 else 1 + hdim2_pos = 1 if time_pos == 2 else 2 + first_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim2_pos]] + second_dim = coord_to_ax[variable_cube.coord_dims(coord)[hdim1_pos]] + points = (second_dim[0], first_dim[0]) + values = var_coord.points + xi = np.column_stack((t[second_dim[1]], t[first_dim[1]])) + coordinate_points = interpn(points, values, xi) + + # interpolate 3D coordinates: + elif var_coord.ndim == 3: + first_dim = coord_to_ax[variable_cube.coord_dims(coord)[0]] + second_dim = coord_to_ax[variable_cube.coord_dims(coord)[1]] + third_dim = coord_to_ax[variable_cube.coord_dims(coord)[2]] + coordinate_points = interpn( + [first_dim[0], second_dim[0], third_dim[0]], + var_coord.points, + [ + [a, b, c] + for a, b, c in zip( + t[first_dim[1]], t[second_dim[1]], t[third_dim[1]] + ) + ], + ) + # coordinate_points=[f(a,b) for a,b in zip(t[first_dim[1]],t[second_dim[1]])] + + # write resulting array or list into DataFrame: + t[coord] = coordinate_points + + logging.debug("added coord: " + coord) + return t diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 1396619f..672978e2 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -219,9 +219,6 @@ def find_hdim_axes_3d( vertical_coord_name = find_vertical_coord_name( field_in, vertical_coord=vertical_coord ) - except ValueError: - vert_coord_found = False - else: vert_coord_found = True ndim_vertical = find_axis_from_dim_coord(field_in, vertical_coord_name) if ndim_vertical is None: @@ -231,6 +228,9 @@ def find_hdim_axes_3d( ) vertical_coord_axis = ndim_vertical + except ValueError: + vert_coord_found = False + if not vert_coord_found: # if we don't have a vertical coordinate, and we are 3D or lower # that is okay. From 61329e41d1a247051efca579262b098e58db4dd3 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 21 Sep 2023 10:30:35 -0700 Subject: [PATCH 10/32] linting for xarray_utils --- tobac/tests/test_xarray_utils.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tobac/tests/test_xarray_utils.py b/tobac/tests/test_xarray_utils.py index 643058f6..6a9f1dbf 100644 --- a/tobac/tests/test_xarray_utils.py +++ b/tobac/tests/test_xarray_utils.py @@ -1,17 +1,14 @@ """Tests for tobac.utils.internal_utils.xarray_utils """ - -import tobac.utils.internal as internal_utils -import tobac.utils.internal.xarray_utils as xr_utils -import tobac.testing as tbtest - from typing import Union import pytest import numpy as np import xarray as xr +import tobac.utils.internal.xarray_utils as xr_utils + @pytest.mark.parametrize( "dim_names, coord_dim_map, coord_looking_for, expected_out, expected_raise", From c5667883d60f63e63ea8847911742d98f6d1219a Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 29 Sep 2023 15:53:21 -0500 Subject: [PATCH 11/32] adding new test function to test iris and xarray pathways --- tobac/tests/test_iris_xarray_match_utils.py | 90 +++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 tobac/tests/test_iris_xarray_match_utils.py diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py new file mode 100644 index 00000000..8c1d5b85 --- /dev/null +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -0,0 +1,90 @@ +"""Tests to confirm that xarray and iris pathways work the same and produce the same data + for the same input datasets. +""" +import datetime + +import numpy as np +import pandas as pd +import xarray as xr +import pytest + + +import tobac.testing as tbtest +import tobac.utils.internal.iris_utils as iris_utils +import tobac.utils.internal.xarray_utils as xr_utils + + +@pytest.mark.parametrize( + "feature_positions, coordinates, expected_val", + [(((0, 0, 0), (9, 9, 9)), {"x": ("x", np.linspace(0, 9, 10))}, {"x": (0, 9)})], +) +def test_add_coordinates_2D( + feature_positions: tuple[tuple[float]], + coordinates: dict[str : tuple[str, np.ndarray]], + expected_val: dict[str : tuple[float]], +): + """ + Test that add_coordinates_2D for xarray and iris are equal. + + Parameters + ---------- + feature_positions: tuple of tuple of floats + Locations of the features to test in (hdim_1, hdim_2, zdim [optional]) coordinates + coordinates: dict, key: str; value: tuple of str, numpy array + Coordinates to use, in xarray coordinate style. Dims will be ('x', 'y', 'z') for 3D + data (determined by feature_positions) and ('x', 'y') for 2D data. All axes will have + size 10. + expected_val: dict, key: str; value: tuple of floats + Expected interpolated coordinates + + """ + + all_indiv_feats = list() + if len(feature_positions[0]) == 2: + is_3D = False + elif len(feature_positions[0]) == 3: + is_3D = True + else: + raise ValueError("Feature positions should be 2 or 3D") + for i, single_feat_position in enumerate(feature_positions): + if not is_3D and len(single_feat_position) == 2: + all_indiv_feats.append( + tbtest.generate_single_feature( + single_feat_position[0], + single_feat_position[1], + feature_num=i, + max_h1=10, + max_h2=10, + ) + ) + elif is_3D and len(single_feat_position) == 3: + all_indiv_feats.append( + tbtest.generate_single_feature( + single_feat_position[0], + single_feat_position[1], + start_v=single_feat_position[2], + feature_num=i, + max_h1=10, + max_h2=10, + ) + ) + + else: + raise ValueError("Feature positions should be 2 or 3D") + + all_feats = pd.concat(all_indiv_feats) + + da_size = (1, 10, 10, 10) if is_3D else (1, 10, 10) + dims = ("time", "x", "y", "z") if is_3D else ("time", "x", "y") + coordinates["time"] = np.array((datetime.datetime(2000, 1, 1, 0),)) + da_with_coords = xr.DataArray( + data=np.empty(da_size), dims=dims, coords=coordinates + ) + if is_3D: + iris_coord_interp = iris_utils.add_coordinates_3D( + all_feats, da_with_coords.to_iris() + ) + else: + iris_coord_interp = iris_utils.add_coordinates( + all_feats, da_with_coords.to_iris() + ) From ae6cf80aeca5f008cbe916bfb167c9b9aed80440 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 4 Oct 2023 16:22:20 -0500 Subject: [PATCH 12/32] working xarray interpolation now --- tobac/tests/test_iris_xarray_match_utils.py | 74 ++++++++++++---- tobac/utils/internal/xarray_utils.py | 94 ++++++++++++++++++++- 2 files changed, 149 insertions(+), 19 deletions(-) diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index 8c1d5b85..9680d675 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -16,15 +16,43 @@ @pytest.mark.parametrize( "feature_positions, coordinates, expected_val", - [(((0, 0, 0), (9, 9, 9)), {"x": ("x", np.linspace(0, 9, 10))}, {"x": (0, 9)})], + [ + ( + ((0, 0, 0), (9, 9, 9)), + {"x": ("x", np.linspace(0, 10, 10)), "z": ("z", np.linspace(0, 10, 10))}, + {"x": (0, 9)}, + ), + ( + ((0, 0), (9, 9)), + {"x": ("x", np.linspace(0, 10, 10))}, + {"x": (0, 9)}, + ), + ( + ((0, 0), (9, 9), (5, 7)), + { + "longitude": ("x", np.linspace(-30, 60, 10)), + "latitude": ("y", np.linspace(-70, 20, 10)), + }, + {"latitude": (-70, 20, 0), "longitude": (-30, 60, 20)}, + ), + ( + ((0, 0), (9, 9), (5, 7)), + { + "longitude": ("x", np.linspace(-30, 60, 10)), + "latitude": ("y", np.linspace(-70, 20, 10)), + }, + {"latitude": (-70, 20, 0), "longitude": (-30, 60, 20)}, + ), + ], ) -def test_add_coordinates_2D( +def test_add_coordinates_xarray_base( feature_positions: tuple[tuple[float]], coordinates: dict[str : tuple[str, np.ndarray]], expected_val: dict[str : tuple[float]], ): """ - Test that add_coordinates_2D for xarray and iris are equal. + Test that adding coordinates for xarray and iris are equal, using an + xarray generated dataset as the base. Parameters ---------- @@ -39,7 +67,7 @@ def test_add_coordinates_2D( """ - all_indiv_feats = list() + all_indiv_feats = [] if len(feature_positions[0]) == 2: is_3D = False elif len(feature_positions[0]) == 3: @@ -72,19 +100,29 @@ def test_add_coordinates_2D( else: raise ValueError("Feature positions should be 2 or 3D") - all_feats = pd.concat(all_indiv_feats) + all_feats = pd.concat(all_indiv_feats) - da_size = (1, 10, 10, 10) if is_3D else (1, 10, 10) - dims = ("time", "x", "y", "z") if is_3D else ("time", "x", "y") - coordinates["time"] = np.array((datetime.datetime(2000, 1, 1, 0),)) - da_with_coords = xr.DataArray( - data=np.empty(da_size), dims=dims, coords=coordinates + da_size = (1, 10, 10, 10) if is_3D else (1, 10, 10) + dims = ("time", "x", "y", "z") if is_3D else ("time", "x", "y") + coordinates["time"] = np.array((datetime.datetime(2000, 1, 1, 0),)) + da_with_coords = xr.DataArray(data=np.empty(da_size), dims=dims, coords=coordinates) + if is_3D: + iris_coord_interp = iris_utils.add_coordinates_3D( + all_feats, da_with_coords.to_iris() ) - if is_3D: - iris_coord_interp = iris_utils.add_coordinates_3D( - all_feats, da_with_coords.to_iris() - ) - else: - iris_coord_interp = iris_utils.add_coordinates( - all_feats, da_with_coords.to_iris() - ) + xr_coord_interp = xr_utils.add_coordinates_to_features( + all_feats, da_with_coords + ) + + else: + iris_coord_interp = iris_utils.add_coordinates( + all_feats, da_with_coords.to_iris() + ) + xr_coord_interp = xr_utils.add_coordinates_to_features( + all_feats, da_with_coords + ) + for val_name in expected_val: + assert (iris_coord_interp[val_name] == expected_val[val_name]).all() + assert (xr_coord_interp[val_name] == expected_val[val_name]).all() + + pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 672978e2..cd878af4 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -5,6 +5,7 @@ from typing import Union import numpy as np +import pandas as pd import xarray as xr from . import general_internal as tb_utils_gi @@ -182,7 +183,7 @@ def find_hdim_axes_3d( vertical_coord: Union[str, None] = None, vertical_axis: Union[int, None] = None, time_dim_coord_name: str = "time", -) -> tuple[int]: +) -> tuple[int, int]: """Finds what the hdim axes are given a 3D (including z) or 4D (including z and time) dataset. @@ -246,3 +247,94 @@ def find_hdim_axes_3d( all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))] ) return output_vals + + +def add_coordinates_to_features( + feature_df: pd.DataFrame, + variable_da: xr.DataArray, + vertical_coord: Union[str, None] = None, + vertical_axis: Union[int, None] = None, + assume_coords_fixed_in_time: bool = True, +) -> pd.DataFrame: + """Function to populate the interpolated coordinates to feature + + Parameters + ---------- + feature_df: pandas DataFrame + Feature dataframe + variable_da: xarray.DataArray + DataArray (usually the one you are tracking on) at least conaining the dimension of 'time'. + Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate', + and 'altitude' (if 3D) are the coordinates that we expect, although this function + will happily interpolate along any coordinates you give. + vertical_coord: str + Name of the vertical coordinate. If None, tries to auto-detect. + If it is a string, it looks for the coordinate or the dimension name corresponding + to the string. If it is an int, it assumes that it is the vertical axis. + Note that if you only have a 2D or 3D coordinate for altitude, you must + pass in an int. + vertical_axis: int or None + Axis number of the vertical. + assume_coords_fixed_in_time: bool + If true, it assumes that the coordinates are fixed in time, even if the + coordinates say they vary in time. This is, by default, True, to preserve + legacy functionality. If False, it assumes that if a coordinate says + it varies in time, it takes the coordinate at its word. + Returns + ------- + + """ + + time_dim_name: str = "time" + # first, we must find the names of the dimensions corresponding to the numbered + # dimensions. + + ndims: int = variable_da.ndim + + time_dim_number = find_axis_from_dim(variable_da, time_dim_name) + + is_3d = (time_dim_number is not None and ndims == 4) or ( + time_dim_number is None and ndims == 3 + ) + if is_3d: + hdim1_axis, hdim2_axis = find_hdim_axes_3d( + variable_da, + vertical_coord, + vertical_axis, + time_dim_coord_name=time_dim_name, + ) + if vertical_axis is None: + vdim_coord = find_vertical_coord_name(variable_da, vertical_coord) + else: + vdim_coord = variable_da.dims[vertical_axis] + else: # 2D + if ndims == 2: + hdim1_axis = 0 + hdim2_axis = 1 + elif ndims == 3 and time_dim_number is not None: + possible_dims = [0, 1, 2] + possible_dims.pop(time_dim_number) + hdim1_axis, hdim2_axis = possible_dims + else: + raise ValueError("DataArray has too many or too few dimensions") + + hdim1_name = variable_da.dims[hdim1_axis] + hdim2_name = variable_da.dims[hdim2_axis] + + dim_interp_coords = { + hdim1_name: xr.DataArray(feature_df["hdim_1"].values, dims="features"), + hdim2_name: xr.DataArray(feature_df["hdim_2"].values, dims="features"), + } + + if is_3d: + dim_interp_coords[vdim_coord] = xr.DataArray( + feature_df["vdim"].values, dims="features" + ) + + interpolated_df = variable_da.interp(coords=dim_interp_coords) + + for interp_coord in interpolated_df.coords: + if interp_coord == time_dim_name: + continue + feature_df[interp_coord] = interpolated_df[interp_coord].values + return feature_df From 3dbf4971bec083760f1ea631ea73b7e418257991 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 4 Oct 2023 21:40:27 -0500 Subject: [PATCH 13/32] adding 2D array interpolation --- tobac/tests/test_iris_xarray_match_utils.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index 9680d675..b064f609 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -36,12 +36,18 @@ {"latitude": (-70, 20, 0), "longitude": (-30, 60, 20)}, ), ( - ((0, 0), (9, 9), (5, 7)), + ((0, 0), (9, 9), (5, 7), (3.6, 7.9)), { - "longitude": ("x", np.linspace(-30, 60, 10)), - "latitude": ("y", np.linspace(-70, 20, 10)), + "longitude": ( + ("x", "y"), + np.arange(-180, -80).reshape(10, -1), + ), + "latitude": (("x", "y"), np.arange(-50, 50).reshape(10, -1)), + }, + { + "latitude": (-50, 49, 7, -6.1), + "longitude": (-180, -81, -123, -136.1), }, - {"latitude": (-70, 20, 0), "longitude": (-30, 60, 20)}, ), ], ) From 755b4eabbc88a5ce61cd30854af7fab07aba4d57 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 4 Oct 2023 21:45:52 -0500 Subject: [PATCH 14/32] switch to asserting for almost equal --- tobac/tests/test_iris_xarray_match_utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index b064f609..578c5e9e 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -128,7 +128,14 @@ def test_add_coordinates_xarray_base( all_feats, da_with_coords ) for val_name in expected_val: - assert (iris_coord_interp[val_name] == expected_val[val_name]).all() - assert (xr_coord_interp[val_name] == expected_val[val_name]).all() + np.testing.assert_almost_equal( + iris_coord_interp[val_name], expected_val[val_name] + ) + np.testing.assert_almost_equal( + xr_coord_interp[val_name], expected_val[val_name] + ) + + # assert (iris_coord_interp[val_name] == expected_val[val_name]).all() + # assert (xr_coord_interp[val_name] == expected_val[val_name]).all() pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) From 3170496fa3957d322d2a2e23b124bd349f6b3054 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 5 Oct 2023 10:50:49 -0500 Subject: [PATCH 15/32] incorporating julia's statistics code --- tobac/feature_detection.py | 4 ++-- tobac/utils/general.py | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 23aeb548..2b6f4408 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -901,7 +901,7 @@ def feature_detection_multithreshold_timestep( dxy: float = -1, wavelength_filtering: tuple[float] = None, strict_thresholding: bool = False, - statistics: Union[dict, None] = None + statistics: Union[dict, None] = None, ) -> pd.DataFrame: """Find features in each timestep. @@ -1141,7 +1141,7 @@ def feature_detection_multithreshold( wavelength_filtering: tuple = None, dz: Union[float, None] = None, strict_thresholding: bool = False, - statistics: Union[dict, None] = None + statistics: Union[dict, None] = None, ) -> pd.DataFrame: """Perform feature detection based on contiguous regions. diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 1b52c068..1676358d 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -457,7 +457,6 @@ def get_statistics( # mask must contain positive values to calculate statistics if labels[labels > 0].size > 0: - if index is None: index = range( int(np.nanmin(labels[labels > 0])), int(np.nanmax(labels) + 1) @@ -529,7 +528,6 @@ def get_statistics( # add results of computed statistics to feature dataframe with column name given per func_dict for idx, label in enumerate(np.unique(labels[labels > 0])): - # test if values are scalars if not hasattr(stats[idx], "__len__"): # if yes, we can just assign the value to the new column and row of the respective feature From 8dfc1dd106bdada5d2d36543c51825136e256261 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Fri, 6 Oct 2023 21:50:12 -0500 Subject: [PATCH 16/32] adding times to add_coordinates --- tobac/feature_detection.py | 6 +++--- tobac/utils/general.py | 10 ++++++++-- tobac/utils/internal/xarray_utils.py | 6 ++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 490efcb2..6e46eec7 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1377,15 +1377,15 @@ def feature_detection_multithreshold( # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) if "vdim" in features: features = add_coordinates_3D( - features, field_in.to_iris(), vertical_coord=vertical_coord + features, field_in, vertical_coord=vertical_coord ) else: - features = add_coordinates(features, field_in.to_iris()) + features = add_coordinates(features, field_in) else: features = None logging.debug("No features detected") logging.debug("feature detection completed") - return features.to_xarray() + return features def filter_min_distance( diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 91152bb7..06d1a264 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -42,7 +42,7 @@ def add_coordinates( if isinstance(variable_cube, iris.cube.Cube): return internal_utils.iris_utils.add_coordinates(t, variable_cube) if isinstance(variable_cube, xr.DataArray): - raise NotImplementedError("add_coordinates not implemented for xarray.") + return internal_utils.xr_utils.add_coordinates_to_features(t, variable_cube) raise ValueError( "add_coordinates only supports xarray.DataArray and iris.cube.Cube" ) @@ -91,7 +91,13 @@ def add_coordinates_3D( t, variable_cube, vertical_coord, vertical_axis, assume_coords_fixed_in_time ) if isinstance(variable_cube, xr.DataArray): - raise NotImplementedError("add_coordinates_3D not implemented for xarray.") + return internal_utils.xr_utils.add_coordinates_to_features( + t, + variable_cube, + vertical_coord=vertical_coord, + vertical_axis=vertical_axis, + assume_coords_fixed_in_time=assume_coords_fixed_in_time, + ) raise ValueError( "add_coordinates_3D only supports xarray.DataArray and iris.cube.Cube" ) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index cd878af4..d7101ed0 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -8,6 +8,7 @@ import pandas as pd import xarray as xr from . import general_internal as tb_utils_gi +import datetime def find_axis_from_dim_coord( @@ -332,6 +333,11 @@ def add_coordinates_to_features( ) interpolated_df = variable_da.interp(coords=dim_interp_coords) + feature_df[time_dim_name] = variable_da[time_dim_name].values[feature_df["frame"]] + feature_df[time_dim_name + "str"] = [ + pd.to_datetime(str(x)).strftime("%Y-%m-%d %H:%M:%S") + for x in variable_da[time_dim_name].values[feature_df["frame"]] + ] for interp_coord in interpolated_df.coords: if interp_coord == time_dim_name: From 034982b7a540da9a91cad70b3a607fcedfe196d5 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 9 Nov 2023 16:03:14 -0600 Subject: [PATCH 17/32] fixes for updates from upstream --- .github/workflows/check_formatting.yml | 2 +- tobac/utils/internal/xarray_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/check_formatting.yml b/.github/workflows/check_formatting.yml index 0d89ad93..b19a31fd 100644 --- a/.github/workflows/check_formatting.yml +++ b/.github/workflows/check_formatting.yml @@ -19,4 +19,4 @@ jobs: shell: bash -l {0} run: mamba install --quiet --yes --file requirements.txt black && - black tobac --check + black tobac --check --diff diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index d7101ed0..7db648bc 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -7,7 +7,7 @@ import numpy as np import pandas as pd import xarray as xr -from . import general_internal as tb_utils_gi +from . import basic as tb_utils_gi import datetime From 0a5f9b71fcd107785d6f74842d303a8b6bd5b207 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 9 Nov 2023 16:04:23 -0600 Subject: [PATCH 18/32] fix literal import in feature detection --- tobac/feature_detection.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 7e340f2e..88f84254 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -36,7 +36,6 @@ from tobac.utils.general import spectral_filtering from tobac.utils import get_statistics import warnings -from typing import Union, Literal # from typing_extensions import Literal import iris From 561ad32e50e7f04bb3a40180a3b7ee30e1b5cacb Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 15 Nov 2023 08:32:04 -0600 Subject: [PATCH 19/32] add new tests --- tobac/tests/test_iris_xarray_match_utils.py | 52 +++++++++++++++++++++ tobac/tests/test_xarray_utils.py | 2 +- tobac/utils/internal/xarray_utils.py | 51 ++++++++++++++------ 3 files changed, 89 insertions(+), 16 deletions(-) diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index 578c5e9e..f2927277 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -1,6 +1,7 @@ """Tests to confirm that xarray and iris pathways work the same and produce the same data for the same input datasets. """ +import copy import datetime import numpy as np @@ -139,3 +140,54 @@ def test_add_coordinates_xarray_base( # assert (xr_coord_interp[val_name] == expected_val[val_name]).all() pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) + + +@pytest.mark.parametrize( + "coordinate_names, coordinate_standard_names", + [(("lat",), ("latitude",))], +) +def test_add_coordinates_xarray_std_names( + coordinate_names: tuple[str], + coordinate_standard_names: tuple[str], +): + """ + Test that adding coordinates for xarray and iris result in the same coordinate names + when standard_names are added to the xarray coordinates + + Parameters + ---------- + coordinate_names: tuple of str + names of coordinates to give + coordinate_standard_name: tuple of str + standard_names of coordinates to give + + """ + + all_feats = tbtest.generate_single_feature( + 0, + 0, + feature_num=1, + max_h1=10, + max_h2=10, + ) + + da_size = (1, 10, 10) + dims = ("time", "x", "y") + coordinates = dict() + coordinates["time"] = np.array((datetime.datetime(2000, 1, 1, 0),)) + + for coord_name, coord_standard_name in zip( + coordinate_names, coordinate_standard_names + ): + coordinates[coord_name] = xr.DataArray(data=np.arange(10), dims="x") + coordinates[coord_name].attrs["standard_name"] = coord_standard_name + + da_with_coords = xr.DataArray(data=np.empty(da_size), dims=dims, coords=coordinates) + + iris_coord_interp = iris_utils.add_coordinates( + copy.deepcopy(all_feats), da_with_coords.to_iris() + ) + xr_coord_interp = xr_utils.add_coordinates_to_features( + copy.deepcopy(all_feats), da_with_coords + ) + pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) diff --git a/tobac/tests/test_xarray_utils.py b/tobac/tests/test_xarray_utils.py index 6a9f1dbf..210a1a90 100644 --- a/tobac/tests/test_xarray_utils.py +++ b/tobac/tests/test_xarray_utils.py @@ -98,7 +98,7 @@ ) def test_find_axis_from_dim_coord( dim_names: tuple[str], - coord_dim_map: dict[str : tuple[str],], + coord_dim_map: dict, coord_looking_for: str, expected_out: Union[int, None], expected_raise: bool, diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 7db648bc..7aec4d7a 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -2,7 +2,7 @@ """ from __future__ import annotations - +import copy from typing import Union import numpy as np import pandas as pd @@ -255,7 +255,8 @@ def add_coordinates_to_features( variable_da: xr.DataArray, vertical_coord: Union[str, None] = None, vertical_axis: Union[int, None] = None, - assume_coords_fixed_in_time: bool = True, + use_standard_names: bool = True, + interp_dims_without_coords: bool = False, ) -> pd.DataFrame: """Function to populate the interpolated coordinates to feature @@ -276,15 +277,19 @@ def add_coordinates_to_features( pass in an int. vertical_axis: int or None Axis number of the vertical. - assume_coords_fixed_in_time: bool - If true, it assumes that the coordinates are fixed in time, even if the - coordinates say they vary in time. This is, by default, True, to preserve - legacy functionality. If False, it assumes that if a coordinate says - it varies in time, it takes the coordinate at its word. + use_standard_names: bool + If true, when interpolating a coordinate, it looks for a standard_name + and uses that to name the output coordinate, to mimic iris functionality. + If false, uses the actual name of the coordinate to output. + interp_dims_without_coords: bool + If True, interpolates dimensions without coordinates + If False, skips dimensions without coordinates Returns ------- """ + # make a copy to avoid editing in place. + return_feat_df = copy.deepcopy(feature_df) time_dim_name: str = "time" # first, we must find the names of the dimensions corresponding to the numbered @@ -297,6 +302,7 @@ def add_coordinates_to_features( is_3d = (time_dim_number is not None and ndims == 4) or ( time_dim_number is None and ndims == 3 ) + vdim_coord = None if is_3d: hdim1_axis, hdim2_axis = find_hdim_axes_3d( variable_da, @@ -323,24 +329,39 @@ def add_coordinates_to_features( hdim2_name = variable_da.dims[hdim2_axis] dim_interp_coords = { - hdim1_name: xr.DataArray(feature_df["hdim_1"].values, dims="features"), - hdim2_name: xr.DataArray(feature_df["hdim_2"].values, dims="features"), + hdim1_name: xr.DataArray(return_feat_df["hdim_1"].values, dims="features"), + hdim2_name: xr.DataArray(return_feat_df["hdim_2"].values, dims="features"), } if is_3d: dim_interp_coords[vdim_coord] = xr.DataArray( - feature_df["vdim"].values, dims="features" + return_feat_df["vdim"].values, dims="features" ) interpolated_df = variable_da.interp(coords=dim_interp_coords) - feature_df[time_dim_name] = variable_da[time_dim_name].values[feature_df["frame"]] - feature_df[time_dim_name + "str"] = [ + return_feat_df[time_dim_name] = variable_da[time_dim_name].values[ + return_feat_df["frame"] + ] + return_feat_df[time_dim_name + "str"] = [ pd.to_datetime(str(x)).strftime("%Y-%m-%d %H:%M:%S") - for x in variable_da[time_dim_name].values[feature_df["frame"]] + for x in variable_da[time_dim_name].values[return_feat_df["frame"]] ] for interp_coord in interpolated_df.coords: + # skip time coordinate because we dealt with that already if interp_coord == time_dim_name: continue - feature_df[interp_coord] = interpolated_df[interp_coord].values - return feature_df + + if interp_coord not in variable_da.coords and not interp_dims_without_coords: + continue + + interp_coord_name = interp_coord + # if we have standard names and are using them, rename our coordinates. + if use_standard_names: + try: + interp_coord_name = interpolated_df[interp_coord].attrs["standard_name"] + except KeyError: + pass + + return_feat_df[interp_coord_name] = interpolated_df[interp_coord].values + return return_feat_df From 87c5603b7d513ea0b898e0a3d4c39c2872394183 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 21 Nov 2023 09:40:42 -0600 Subject: [PATCH 20/32] add RNG for vertical axis --- tobac/utils/internal/xarray_utils.py | 57 ++++++++++++++++++++++++---- 1 file changed, 50 insertions(+), 7 deletions(-) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 7aec4d7a..5d06bc26 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -9,6 +9,7 @@ import xarray as xr from . import basic as tb_utils_gi import datetime +import random, string def find_axis_from_dim_coord( @@ -184,7 +185,8 @@ def find_hdim_axes_3d( vertical_coord: Union[str, None] = None, vertical_axis: Union[int, None] = None, time_dim_coord_name: str = "time", -) -> tuple[int, int]: + return_vertical_axis: bool = False, +) -> Union[tuple[int, int], tuple[int, int, int]]: """Finds what the hdim axes are given a 3D (including z) or 4D (including z and time) dataset. @@ -200,6 +202,8 @@ def find_hdim_axes_3d( that only one of vertical_axis or vertical_coord can be set. time_dim_coord_name: str Name of the time dimension/coordinate + return_vertical_axis: bool + True if you want to also return the vertical axis number as the last value Returns ------- @@ -247,6 +251,8 @@ def find_hdim_axes_3d( output_vals = tuple( all_axes[np.logical_not(np.isin(all_axes, [time_axis, vertical_coord_axis]))] ) + if return_vertical_axis: + output_vals = (*output_vals, vertical_coord_axis) return output_vals @@ -304,14 +310,16 @@ def add_coordinates_to_features( ) vdim_coord = None if is_3d: - hdim1_axis, hdim2_axis = find_hdim_axes_3d( + hdim1_axis, hdim2_axis, vertical_axis = find_hdim_axes_3d( variable_da, vertical_coord, vertical_axis, time_dim_coord_name=time_dim_name, + return_vertical_axis=True, ) if vertical_axis is None: vdim_coord = find_vertical_coord_name(variable_da, vertical_coord) + vertical_axis = find_axis_from_dim_coord(variable_da, vdim_coord) else: vdim_coord = variable_da.dims[vertical_axis] else: # 2D @@ -325,8 +333,31 @@ def add_coordinates_to_features( else: raise ValueError("DataArray has too many or too few dimensions") - hdim1_name = variable_da.dims[hdim1_axis] - hdim2_name = variable_da.dims[hdim2_axis] + # create new coordinates that are simply the i, j, k values + + # generate random names for the new coordinates that are based on i, j, k values + hdim1_name = "".join( + random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) + for _ in range(16) + ) + hdim2_name = "".join( + random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) + for _ in range(16) + ) + + new_coords = { + hdim1_name: ( + variable_da.dims[hdim1_axis], + np.arange(0, variable_da.shape[hdim1_axis], 1), + ), + hdim2_name: ( + variable_da.dims[hdim2_axis], + np.arange(0, variable_da.shape[hdim2_axis], 1), + ), + } + + # hdim1_name = variable_da.dims[hdim1_axis] + # hdim2_name = variable_da.dims[hdim2_axis] dim_interp_coords = { hdim1_name: xr.DataArray(return_feat_df["hdim_1"].values, dims="features"), @@ -334,11 +365,23 @@ def add_coordinates_to_features( } if is_3d: - dim_interp_coords[vdim_coord] = xr.DataArray( + vdim_name = "".join( + random.choice( + string.ascii_uppercase + string.ascii_lowercase + string.digits + ) + for _ in range(16) + ) + dim_interp_coords[vdim_name] = xr.DataArray( return_feat_df["vdim"].values, dims="features" ) - - interpolated_df = variable_da.interp(coords=dim_interp_coords) + new_coords[vdim_name] = ( + ( + variable_da.dims[vertical_axis], + np.arange(0, variable_da.shape[vertical_axis], 1), + ), + ) + variable_da_new_coords = variable_da.assign_coords(coords=new_coords) + interpolated_df = variable_da_new_coords.interp(coords=dim_interp_coords) return_feat_df[time_dim_name] = variable_da[time_dim_name].values[ return_feat_df["frame"] ] From 6d120388b5cb1293fd5968804dd46d38ebcadd80 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Sat, 2 Dec 2023 15:23:40 -0600 Subject: [PATCH 21/32] Updated coordinate interpolation for xarray, things now match. --- tobac/tests/test_iris_xarray_match_utils.py | 4 +- tobac/utils/general.py | 1 - tobac/utils/internal/xarray_utils.py | 71 +++++++++++++-------- 3 files changed, 45 insertions(+), 31 deletions(-) diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index f2927277..23026da8 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -21,12 +21,12 @@ ( ((0, 0, 0), (9, 9, 9)), {"x": ("x", np.linspace(0, 10, 10)), "z": ("z", np.linspace(0, 10, 10))}, - {"x": (0, 9)}, + {"x": (0, 10)}, ), ( ((0, 0), (9, 9)), {"x": ("x", np.linspace(0, 10, 10))}, - {"x": (0, 9)}, + {"x": (0, 10)}, ), ( ((0, 0), (9, 9), (5, 7)), diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 46d84dfa..69dc9743 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -96,7 +96,6 @@ def add_coordinates_3D( variable_cube, vertical_coord=vertical_coord, vertical_axis=vertical_axis, - assume_coords_fixed_in_time=assume_coords_fixed_in_time, ) raise ValueError( "add_coordinates_3D only supports xarray.DataArray and iris.cube.Cube" diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 5d06bc26..88e71c88 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -4,6 +4,8 @@ import copy from typing import Union + +import cftime import numpy as np import pandas as pd import xarray as xr @@ -263,6 +265,7 @@ def add_coordinates_to_features( vertical_axis: Union[int, None] = None, use_standard_names: bool = True, interp_dims_without_coords: bool = False, + preserve_iris_datetime_types: bool = True, ) -> pd.DataFrame: """Function to populate the interpolated coordinates to feature @@ -290,8 +293,12 @@ def add_coordinates_to_features( interp_dims_without_coords: bool If True, interpolates dimensions without coordinates If False, skips dimensions without coordinates + preserve_iris_datetime_types: bool + If True, uses the same datetime types as iris (cftime) + If False, converts datetime output to pandas standard Returns ------- + Dataframe with coordinates added """ # make a copy to avoid editing in place. @@ -333,55 +340,55 @@ def add_coordinates_to_features( else: raise ValueError("DataArray has too many or too few dimensions") - # create new coordinates that are simply the i, j, k values + # If the dimensions share a name with the coordinates and those coordinates do not match + # with the i, j, k-style indices, you cannot `interp` along those i, j, k indices. + # so, instead, we rename the dimensions to random strings so that we can + # run interpolation. + + hdim1_name_original = variable_da.dims[hdim1_axis] + hdim2_name_original = variable_da.dims[hdim2_axis] # generate random names for the new coordinates that are based on i, j, k values - hdim1_name = "".join( + hdim1_name_new = "".join( random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(16) ) - hdim2_name = "".join( + hdim2_name_new = "".join( random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) for _ in range(16) ) - new_coords = { - hdim1_name: ( - variable_da.dims[hdim1_axis], - np.arange(0, variable_da.shape[hdim1_axis], 1), - ), - hdim2_name: ( - variable_da.dims[hdim2_axis], - np.arange(0, variable_da.shape[hdim2_axis], 1), - ), + dim_new_names = { + hdim1_name_original: hdim1_name_new, + hdim2_name_original: hdim2_name_new, } - - # hdim1_name = variable_da.dims[hdim1_axis] - # hdim2_name = variable_da.dims[hdim2_axis] - dim_interp_coords = { - hdim1_name: xr.DataArray(return_feat_df["hdim_1"].values, dims="features"), - hdim2_name: xr.DataArray(return_feat_df["hdim_2"].values, dims="features"), + hdim1_name_new: xr.DataArray(return_feat_df["hdim_1"].values, dims="features"), + hdim2_name_new: xr.DataArray(return_feat_df["hdim_2"].values, dims="features"), } if is_3d: - vdim_name = "".join( + vdim_name_original = variable_da.dims[vertical_axis] + vdim_name_new = "".join( random.choice( string.ascii_uppercase + string.ascii_lowercase + string.digits ) for _ in range(16) ) - dim_interp_coords[vdim_name] = xr.DataArray( + dim_interp_coords[vdim_name_new] = xr.DataArray( return_feat_df["vdim"].values, dims="features" ) - new_coords[vdim_name] = ( - ( - variable_da.dims[vertical_axis], - np.arange(0, variable_da.shape[vertical_axis], 1), - ), - ) - variable_da_new_coords = variable_da.assign_coords(coords=new_coords) - interpolated_df = variable_da_new_coords.interp(coords=dim_interp_coords) + + dim_new_names[vdim_name_original] = vdim_name_new + + # you can only rename dims alone when operating on datasets, so add our dataarray to a + # dataset + renamed_dim_ds = xr.Dataset({"var": variable_da}).rename_dims(dim_new_names) + + interpolated_df = renamed_dim_ds["var"].interp(coords=dim_interp_coords) + interpolated_df = interpolated_df.drop([hdim1_name_new, hdim2_name_new]) + if is_3d: + interpolated_df = interpolated_df.drop([vdim_name_new]) return_feat_df[time_dim_name] = variable_da[time_dim_name].values[ return_feat_df["frame"] ] @@ -407,4 +414,12 @@ def add_coordinates_to_features( pass return_feat_df[interp_coord_name] = interpolated_df[interp_coord].values + if preserve_iris_datetime_types: + import cftime + + return_feat_df[time_dim_name] = return_feat_df[time_dim_name].apply( + lambda x: cftime.datetime( + x.year, x.month, x.day, x.hour, x.minute, x.second, x.microsecond + ) + ) return return_feat_df From 752d285b1f18ecafb3ef95290a834f8b0f926973 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Mon, 4 Dec 2023 13:29:59 -0600 Subject: [PATCH 22/32] Added preservation of iris datatypes based on whether or not iris data was passed --- tobac/feature_detection.py | 16 +- tobac/tests/test_convert.py | 14 +- tobac/utils/bulk_statistics.py | 2 +- tobac/utils/decorators.py | 595 +++++++++++++++++---------------- tobac/utils/general.py | 24 +- tobac/utils/internal/basic.py | 2 +- 6 files changed, 351 insertions(+), 302 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index fb26e82d..041686bb 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -888,7 +888,7 @@ def feature_detection_threshold( return features_threshold, regions -@internal_utils.irispandas_to_xarray +@internal_utils.irispandas_to_xarray() def feature_detection_multithreshold_timestep( data_i: xr.DataArray, i_time: int, @@ -1130,7 +1130,7 @@ def feature_detection_multithreshold_timestep( return features_thresholds -@internal_utils.irispandas_to_xarray +@internal_utils.irispandas_to_xarray(save_iris_info=True) def feature_detection_multithreshold( field_in: xr.DataArray, dxy: float = None, @@ -1153,6 +1153,7 @@ def feature_detection_multithreshold( dz: Union[float, None] = None, strict_thresholding: bool = False, statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None, + **kwargs, ) -> pd.DataFrame: """Perform feature detection based on contiguous regions. @@ -1401,10 +1402,17 @@ def feature_detection_multithreshold( # features_filtered.drop(columns=['idx','num','threshold_value'],inplace=True) if "vdim" in features: features = add_coordinates_3D( - features, field_in, vertical_coord=vertical_coord + features, + field_in, + vertical_coord=vertical_coord, + preserve_iris_datetime_types=kwargs["converted_from_iris"], ) else: - features = add_coordinates(features, field_in) + features = add_coordinates( + features, + field_in, + preserve_iris_datetime_types=kwargs["converted_from_iris"], + ) else: features = None logging.debug("No features detected") diff --git a/tobac/tests/test_convert.py b/tobac/tests/test_convert.py index d5b6bbe8..ca2228fd 100644 --- a/tobac/tests/test_convert.py +++ b/tobac/tests/test_convert.py @@ -174,8 +174,9 @@ def test_function_kwarg(test_input, kwarg=None): def test_function_tuple_output(test_input, kwarg=None): return (test_input, test_input) - decorated_function_kwarg = decorator(test_function_kwarg) - decorated_function_tuple = decorator(test_function_tuple_output) + decorator_i = decorator() + decorated_function_kwarg = decorator_i(test_function_kwarg) + decorated_function_tuple = decorator_i(test_function_tuple_output) if input_types[0] == xarray.DataArray: data = xarray.DataArray.from_iris(tobac.testing.make_simple_sample_data_2D()) @@ -227,7 +228,8 @@ def test_xarray_workflow(): data_xarray = xarray.DataArray.from_iris(deepcopy(data)) # Testing the get_spacings utility - get_spacings_xarray = xarray_to_iris(tobac.utils.get_spacings) + xarray_to_iris_i = xarray_to_iris() + get_spacings_xarray = xarray_to_iris_i(tobac.utils.get_spacings) dxy, dt = tobac.utils.get_spacings(data) dxy_xarray, dt_xarray = get_spacings_xarray(data_xarray) @@ -235,7 +237,7 @@ def test_xarray_workflow(): assert dt == dt_xarray # Testing feature detection - feature_detection_xarray = xarray_to_iris( + feature_detection_xarray = xarray_to_iris_i( tobac.feature_detection.feature_detection_multithreshold ) features = tobac.feature_detection.feature_detection_multithreshold( @@ -246,7 +248,7 @@ def test_xarray_workflow(): assert_frame_equal(features, features_xarray) # Testing the segmentation - segmentation_xarray = xarray_to_iris(tobac.segmentation.segmentation) + segmentation_xarray = xarray_to_iris_i(tobac.segmentation.segmentation) mask, features = tobac.segmentation.segmentation(features, data, dxy, threshold=1.0) mask_xarray, features_xarray = segmentation_xarray( features_xarray, data_xarray, dxy_xarray, threshold=1.0 @@ -255,7 +257,7 @@ def test_xarray_workflow(): assert (mask.data == mask_xarray.to_iris().data).all() # testing tracking - tracking_xarray = xarray_to_iris(tobac.tracking.linking_trackpy) + tracking_xarray = xarray_to_iris_i(tobac.tracking.linking_trackpy) track = tobac.tracking.linking_trackpy(features, data, dt, dxy, v_max=100.0) track_xarray = tracking_xarray( features_xarray, data_xarray, dt_xarray, dxy_xarray, v_max=100.0 diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index f27f9fd5..55fe17fc 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -147,7 +147,7 @@ def get_statistics( return features -@decorators.iris_to_xarray +@decorators.iris_to_xarray() def get_statistics_from_mask( features: pd.DataFrame, segmentation_mask: xr.DataArray, diff --git a/tobac/utils/decorators.py b/tobac/utils/decorators.py index 33b012f7..bd9315d2 100644 --- a/tobac/utils/decorators.py +++ b/tobac/utils/decorators.py @@ -5,344 +5,369 @@ import warnings -def iris_to_xarray(func): - """Decorator that converts all input of a function that is in the form of - Iris cubes into xarray DataArrays and converts all outputs with type - xarray DataArrays back into Iris cubes. - - Parameters - ---------- - func : function - Function to be decorated - - Returns - ------- - wrapper : function - Function including decorator - """ - - import iris - import xarray - - @functools.wraps(func) - def wrapper(*args, **kwargs): - # print(kwargs) - if any([type(arg) == iris.cube.Cube for arg in args]) or any( - [type(arg) == iris.cube.Cube for arg in kwargs.values()] - ): - # print("converting iris to xarray and back") - args = tuple( - [ - xarray.DataArray.from_iris(arg) - if type(arg) == iris.cube.Cube - else arg - for arg in args - ] - ) - kwargs_new = dict( - zip( - kwargs.keys(), +def iris_to_xarray(): + def iris_to_xarray_i(func): + """Decorator that converts all input of a function that is in the form of + Iris cubes into xarray DataArrays and converts all outputs with type + xarray DataArrays back into Iris cubes. + + Parameters + ---------- + func : function + Function to be decorated + + Returns + ------- + wrapper : function + Function including decorator + """ + + import iris + import xarray + + @functools.wraps(func) + def wrapper(*args, **kwargs): + # print(kwargs) + if any([type(arg) == iris.cube.Cube for arg in args]) or any( + [type(arg) == iris.cube.Cube for arg in kwargs.values()] + ): + # print("converting iris to xarray and back") + args = tuple( [ xarray.DataArray.from_iris(arg) if type(arg) == iris.cube.Cube else arg - for arg in kwargs.values() - ], - ) - ) - # print(args) - # print(kwargs) - output = func(*args, **kwargs_new) - if type(output) == tuple: - output = tuple( - [ - xarray.DataArray.to_iris(output_item) - if type(output_item) == xarray.DataArray - else output_item - for output_item in output + for arg in args ] ) - elif type(output) == xarray.DataArray: - output = xarray.DataArray.to_iris(output) - # if output is neither tuple nor an xr.DataArray + kwargs_new = dict( + zip( + kwargs.keys(), + [ + xarray.DataArray.from_iris(arg) + if type(arg) == iris.cube.Cube + else arg + for arg in kwargs.values() + ], + ) + ) + # print(args) + # print(kwargs) + output = func(*args, **kwargs_new) + if type(output) == tuple: + output = tuple( + [ + xarray.DataArray.to_iris(output_item) + if type(output_item) == xarray.DataArray + else output_item + for output_item in output + ] + ) + elif type(output) == xarray.DataArray: + output = xarray.DataArray.to_iris(output) + # if output is neither tuple nor an xr.DataArray + else: + output = func(*args, **kwargs) + else: output = func(*args, **kwargs) + return output - else: - output = func(*args, **kwargs) - return output - - return wrapper + return wrapper + return iris_to_xarray_i -def xarray_to_iris(func): - """Decorator that converts all input of a function that is in the form of - xarray DataArrays into Iris cubes and converts all outputs with type - Iris cubes back into xarray DataArrays. - Parameters - ---------- - func : function - Function to be decorated. +def xarray_to_iris(): + def xarray_to_iris_i(func): + """Decorator that converts all input of a function that is in the form of + xarray DataArrays into Iris cubes and converts all outputs with type + Iris cubes back into xarray DataArrays. - Returns - ------- - wrapper : function - Function including decorator. + Parameters + ---------- + func : function + Function to be decorated. - Examples - -------- - >>> segmentation_xarray = xarray_to_iris(segmentation) + Returns + ------- + wrapper : function + Function including decorator. - This line creates a new function that can process xarray fields and - also outputs fields in xarray format, but otherwise works just like - the original function: + Examples + -------- + >>> segmentation_xarray = xarray_to_iris(segmentation) - >>> mask_xarray, features = segmentation_xarray( - features, data_xarray, dxy, threshold - ) - """ + This line creates a new function that can process xarray fields and + also outputs fields in xarray format, but otherwise works just like + the original function: - import iris - import xarray - - @functools.wraps(func) - def wrapper(*args, **kwargs): - # print(args) - # print(kwargs) - if any([type(arg) == xarray.DataArray for arg in args]) or any( - [type(arg) == xarray.DataArray for arg in kwargs.values()] - ): - # print("converting xarray to iris and back") - args = tuple( - [ - xarray.DataArray.to_iris(arg) - if type(arg) == xarray.DataArray - else arg - for arg in args - ] + >>> mask_xarray, features = segmentation_xarray( + features, data_xarray, dxy, threshold ) - if kwargs: - kwargs_new = dict( - zip( - kwargs.keys(), - [ - xarray.DataArray.to_iris(arg) - if type(arg) == xarray.DataArray - else arg - for arg in kwargs.values() - ], - ) - ) - else: - kwargs_new = kwargs + """ + + import iris + import xarray + + @functools.wraps(func) + def wrapper(*args, **kwargs): # print(args) # print(kwargs) - output = func(*args, **kwargs_new) - if type(output) == tuple: - output = tuple( + if any([type(arg) == xarray.DataArray for arg in args]) or any( + [type(arg) == xarray.DataArray for arg in kwargs.values()] + ): + # print("converting xarray to iris and back") + args = tuple( [ - xarray.DataArray.from_iris(output_item) - if type(output_item) == iris.cube.Cube - else output_item - for output_item in output + xarray.DataArray.to_iris(arg) + if type(arg) == xarray.DataArray + else arg + for arg in args ] ) - else: - if type(output) == iris.cube.Cube: - output = xarray.DataArray.from_iris(output) - - else: - output = func(*args, **kwargs) - # print(output) - return output - - return wrapper - - -def irispandas_to_xarray(func): - """Decorator that converts all input of a function that is in the form of - Iris cubes/pandas Dataframes into xarray DataArrays/xarray Datasets and - converts all outputs with the type xarray DataArray/xarray Dataset - back into Iris cubes/pandas Dataframes. + if kwargs: + kwargs_new = dict( + zip( + kwargs.keys(), + [ + xarray.DataArray.to_iris(arg) + if type(arg) == xarray.DataArray + else arg + for arg in kwargs.values() + ], + ) + ) + else: + kwargs_new = kwargs + # print(args) + # print(kwargs) + output = func(*args, **kwargs_new) + if type(output) == tuple: + output = tuple( + [ + xarray.DataArray.from_iris(output_item) + if type(output_item) == iris.cube.Cube + else output_item + for output_item in output + ] + ) + else: + if type(output) == iris.cube.Cube: + output = xarray.DataArray.from_iris(output) - Parameters - ---------- - func : function - Function to be decorated. + else: + output = func(*args, **kwargs) + # print(output) + return output + + return wrapper + + return xarray_to_iris_i + + +def irispandas_to_xarray(save_iris_info: bool = False): + def irispandas_to_xarray_i(func): + """Decorator that converts all input of a function that is in the form of + Iris cubes/pandas Dataframes into xarray DataArrays/xarray Datasets and + converts all outputs with the type xarray DataArray/xarray Dataset + back into Iris cubes/pandas Dataframes. + + Parameters + ---------- + func : function + Function to be decorated. + + Returns + ------- + wrapper : function + Function including decorator. + """ + import iris + import iris.cube + import xarray + import pandas as pd + + @functools.wraps(func) + def wrapper(*args, **kwargs): + # pass if we did an iris conversion. + if save_iris_info: + if any([(type(arg) == iris.cube.Cube) for arg in args]) or any( + [(type(arg) == iris.cube.Cube) for arg in kwargs.values()] + ): + kwargs["converted_from_iris"] = True + else: + kwargs["converted_from_iris"] = False - Returns - ------- - wrapper : function - Function including decorator. - """ - import iris - import xarray - import pandas as pd - - @functools.wraps(func) - def wrapper(*args, **kwargs): - # print(kwargs) - if any( - [(type(arg) == iris.cube.Cube or type(arg) == pd.DataFrame) for arg in args] - ) or any( - [ - (type(arg) == iris.cube.Cube or type(arg) == pd.DataFrame) - for arg in kwargs.values() - ] - ): - # print("converting iris to xarray and back") - args = tuple( + # print(kwargs) + if any( [ - xarray.DataArray.from_iris(arg) - if type(arg) == iris.cube.Cube - else arg.to_xarray() - if type(arg) == pd.DataFrame - else arg + (type(arg) == iris.cube.Cube or type(arg) == pd.DataFrame) for arg in args ] - ) - kwargs = dict( - zip( - kwargs.keys(), + ) or any( + [ + (type(arg) == iris.cube.Cube or type(arg) == pd.DataFrame) + for arg in kwargs.values() + ] + ): + # print("converting iris to xarray and back") + args = tuple( [ xarray.DataArray.from_iris(arg) if type(arg) == iris.cube.Cube else arg.to_xarray() if type(arg) == pd.DataFrame else arg - for arg in kwargs.values() - ], - ) - ) - - output = func(*args, **kwargs) - if type(output) == tuple: - output = tuple( - [ - xarray.DataArray.to_iris(output_item) - if type(output_item) == xarray.DataArray - else output_item.to_dataframe() - if type(output_item) == xarray.Dataset - else output_item - for output_item in output + for arg in args ] ) - else: - if type(output) == xarray.DataArray: - output = xarray.DataArray.to_iris(output) - elif type(output) == xarray.Dataset: - output = output.to_dataframe() - - else: - output = func(*args, **kwargs) - return output - - return wrapper - - -def xarray_to_irispandas(func): - """Decorator that converts all input of a function that is in the form of - DataArrays/xarray Datasets into xarray Iris cubes/pandas Dataframes and - converts all outputs with the type Iris cubes/pandas Dataframes back into - xarray DataArray/xarray Dataset. - - Parameters - ---------- - func : function - Function to be decorated. - - Returns - ------- - wrapper : function - Function including decorator. - - Examples - -------- - >>> linking_trackpy_xarray = xarray_to_irispandas( - linking_trackpy - ) - - This line creates a new function that can process xarray inputs and - also outputs in xarray formats, but otherwise works just like the - original function: - - >>> track_xarray = linking_trackpy_xarray( - features_xarray, field_xarray, dt, dx - ) - """ - import iris - import xarray - import pandas as pd - - @functools.wraps(func) - def wrapper(*args, **kwargs): - # print(args) - # print(kwargs) - if any( - [ - (type(arg) == xarray.DataArray or type(arg) == xarray.Dataset) - for arg in args - ] - ) or any( - [ - (type(arg) == xarray.DataArray or type(arg) == xarray.Dataset) - for arg in kwargs.values() - ] - ): - # print("converting xarray to iris and back") - args = tuple( - [ - xarray.DataArray.to_iris(arg) - if type(arg) == xarray.DataArray - else arg.to_dataframe() - if type(arg) == xarray.Dataset - else arg - for arg in args - ] - ) - if kwargs: - kwargs_new = dict( + kwargs = dict( zip( kwargs.keys(), [ - xarray.DataArray.to_iris(arg) - if type(arg) == xarray.DataArray - else arg.to_dataframe() - if type(arg) == xarray.Dataset + xarray.DataArray.from_iris(arg) + if type(arg) == iris.cube.Cube + else arg.to_xarray() + if type(arg) == pd.DataFrame else arg for arg in kwargs.values() ], ) ) + + output = func(*args, **kwargs) + if type(output) == tuple: + output = tuple( + [ + xarray.DataArray.to_iris(output_item) + if type(output_item) == xarray.DataArray + else output_item.to_dataframe() + if type(output_item) == xarray.Dataset + else output_item + for output_item in output + ] + ) + else: + if type(output) == xarray.DataArray: + output = xarray.DataArray.to_iris(output) + elif type(output) == xarray.Dataset: + output = output.to_dataframe() + else: - kwargs_new = kwargs + output = func(*args, **kwargs) + return output + + return wrapper + + return irispandas_to_xarray_i + + +def xarray_to_irispandas(): + def xarray_to_irispandas_i(func): + """Decorator that converts all input of a function that is in the form of + DataArrays/xarray Datasets into xarray Iris cubes/pandas Dataframes and + converts all outputs with the type Iris cubes/pandas Dataframes back into + xarray DataArray/xarray Dataset. + + Parameters + ---------- + func : function + Function to be decorated. + + Returns + ------- + wrapper : function + Function including decorator. + + Examples + -------- + >>> linking_trackpy_xarray = xarray_to_irispandas( + linking_trackpy + ) + + This line creates a new function that can process xarray inputs and + also outputs in xarray formats, but otherwise works just like the + original function: + + >>> track_xarray = linking_trackpy_xarray( + features_xarray, field_xarray, dt, dx + ) + """ + import iris + import xarray + import pandas as pd + + @functools.wraps(func) + def wrapper(*args, **kwargs): # print(args) # print(kwargs) - output = func(*args, **kwargs_new) - if type(output) == tuple: - output = tuple( + if any( + [ + (type(arg) == xarray.DataArray or type(arg) == xarray.Dataset) + for arg in args + ] + ) or any( + [ + (type(arg) == xarray.DataArray or type(arg) == xarray.Dataset) + for arg in kwargs.values() + ] + ): + # print("converting xarray to iris and back") + args = tuple( [ - xarray.DataArray.from_iris(output_item) - if type(output_item) == iris.cube.Cube - else output_item.to_xarray() - if type(output_item) == pd.DataFrame - else output_item - for output_item in output + xarray.DataArray.to_iris(arg) + if type(arg) == xarray.DataArray + else arg.to_dataframe() + if type(arg) == xarray.Dataset + else arg + for arg in args ] ) + if kwargs: + kwargs_new = dict( + zip( + kwargs.keys(), + [ + xarray.DataArray.to_iris(arg) + if type(arg) == xarray.DataArray + else arg.to_dataframe() + if type(arg) == xarray.Dataset + else arg + for arg in kwargs.values() + ], + ) + ) + else: + kwargs_new = kwargs + # print(args) + # print(kwargs) + output = func(*args, **kwargs_new) + if type(output) == tuple: + output = tuple( + [ + xarray.DataArray.from_iris(output_item) + if type(output_item) == iris.cube.Cube + else output_item.to_xarray() + if type(output_item) == pd.DataFrame + else output_item + for output_item in output + ] + ) + else: + if type(output) == iris.cube.Cube: + output = xarray.DataArray.from_iris(output) + elif type(output) == pd.DataFrame: + output = output.to_xarray() + else: - if type(output) == iris.cube.Cube: - output = xarray.DataArray.from_iris(output) - elif type(output) == pd.DataFrame: - output = output.to_xarray() + output = func(*args, **kwargs) + # print(output) + return output - else: - output = func(*args, **kwargs) - # print(output) - return output + return wrapper - return wrapper + return xarray_to_irispandas_i def njit_if_available(func, **kwargs): diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 69dc9743..1f40cc0c 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -18,7 +18,9 @@ def add_coordinates( - t: pd.DataFrame, variable_cube: Union[xr.DataArray, iris.cube.Cube] + t: pd.DataFrame, + variable_cube: Union[xr.DataArray, iris.cube.Cube], + preserve_iris_datetime_types: bool = True, ) -> pd.DataFrame: """Add coordinates from the input cube of the feature detection to the trajectories/features. @@ -32,6 +34,10 @@ def add_coordinates( Input data used for the tracking with coordinate information to transfer to the resulting DataFrame. Needs to contain the coordinate 'time'. + preserve_iris_datetime_types: bool + If True, uses the same datetime types as iris (cftime) + If False, converts datetime output to pandas standard + Used for xarray inputs only Returns ------- @@ -42,7 +48,9 @@ def add_coordinates( if isinstance(variable_cube, iris.cube.Cube): return internal_utils.iris_utils.add_coordinates(t, variable_cube) if isinstance(variable_cube, xr.DataArray): - return internal_utils.xr_utils.add_coordinates_to_features(t, variable_cube) + return internal_utils.xr_utils.add_coordinates_to_features( + t, variable_cube, preserve_iris_datetime_types=preserve_iris_datetime_types + ) raise ValueError( "add_coordinates only supports xarray.DataArray and iris.cube.Cube" ) @@ -54,14 +62,15 @@ def add_coordinates_3D( vertical_coord: Union[str, int] = None, vertical_axis: Union[int, None] = None, assume_coords_fixed_in_time: bool = True, + preserve_iris_datetime_types: bool = True, ): """Function adding coordinates from the tracking cube to the trajectories for the 3D case: time, longitude&latitude, x&y dimensions, and altitude Parameters ---------- - t: pandas DataFrame - trajectories/features + t: pandas DataFrame + Input features variable_cube: iris.cube.Cube Cube (usually the one you are tracking on) at least conaining the dimension of 'time'. Typically, 'longitude','latitude','x_projection_coordinate','y_projection_coordinate', @@ -80,6 +89,10 @@ def add_coordinates_3D( coordinates say they vary in time. This is, by default, True, to preserve legacy functionality. If False, it assumes that if a coordinate says it varies in time, it takes the coordinate at its word. + preserve_iris_datetime_types: bool + If True, uses the same datetime types as iris (cftime) + If False, converts datetime output to pandas standard + Used for xarray inputs only Returns ------- @@ -96,6 +109,7 @@ def add_coordinates_3D( variable_cube, vertical_coord=vertical_coord, vertical_axis=vertical_axis, + preserve_iris_datetime_types=preserve_iris_datetime_types, ) raise ValueError( "add_coordinates_3D only supports xarray.DataArray and iris.cube.Cube" @@ -419,7 +433,7 @@ def combine_feature_dataframes( return combined_sorted -@internal_utils.irispandas_to_xarray +@internal_utils.irispandas_to_xarray() def transform_feature_points( features, new_dataset, diff --git a/tobac/utils/internal/basic.py b/tobac/utils/internal/basic.py index 11b58856..1ba787ea 100644 --- a/tobac/utils/internal/basic.py +++ b/tobac/utils/internal/basic.py @@ -285,7 +285,7 @@ def find_axis_from_coord( raise ValueError("variable_arr must be Iris Cube or Xarray DataArray") -@irispandas_to_xarray +@irispandas_to_xarray() def detect_latlon_coord_name( in_dataset: Union[xr.DataArray, iris.cube.Cube], latitude_name: Union[str, None] = None, From d66c27b14e9ea53102e0a9888901aa5819039b00 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 5 Dec 2023 09:34:19 -0600 Subject: [PATCH 23/32] switched to swap_dims and skipped dataset conversion --- tobac/utils/internal/xarray_utils.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 88e71c88..035297cb 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -383,9 +383,8 @@ def add_coordinates_to_features( # you can only rename dims alone when operating on datasets, so add our dataarray to a # dataset - renamed_dim_ds = xr.Dataset({"var": variable_da}).rename_dims(dim_new_names) - - interpolated_df = renamed_dim_ds["var"].interp(coords=dim_interp_coords) + renamed_dim_da = variable_da.swap_dims(dim_new_names) + interpolated_df = renamed_dim_da.interp(coords=dim_interp_coords) interpolated_df = interpolated_df.drop([hdim1_name_new, hdim2_name_new]) if is_3d: interpolated_df = interpolated_df.drop([vdim_name_new]) From ca5631e97d3e9fb076ccb2e438e287b19d0f34e7 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 7 Feb 2024 11:31:14 -0600 Subject: [PATCH 24/32] fix bug with numpy typing --- tobac/utils/bulk_statistics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index 55fe17fc..e31c3806 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -16,7 +16,7 @@ def get_statistics( features: pd.DataFrame, - labels: np.ndarray[int], + labels: np.ndarray, *fields: tuple[np.ndarray], statistic: dict[str, Union[Callable, tuple[Callable, dict]]] = { "ncells": np.count_nonzero From 8679d4447e61e98c94f32ab4f37b7448206391d4 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 22 Feb 2024 21:36:53 -0600 Subject: [PATCH 25/32] updates from @w-k-jones review --- tobac/feature_detection.py | 11 +++-------- tobac/segmentation.py | 2 +- tobac/tests/test_xarray_utils.py | 2 +- tobac/utils/general.py | 4 +--- tobac/utils/internal/basic.py | 2 +- tobac/utils/internal/iris_utils.py | 2 +- tobac/utils/internal/xarray_utils.py | 12 +++++------- 7 files changed, 13 insertions(+), 22 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 041686bb..24d4600d 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1246,11 +1246,6 @@ def feature_detection_multithreshold( logging.debug("start feature detection based on thresholds") ndim_time = internal_utils.find_axis_from_coord(field_in, time_var_name) - if ndim_time is None: - raise ValueError( - "input to feature detection step must include a dimension named " - + time_var_name - ) # Check whether we need to run 2D or 3D feature detection if field_in.ndim == 3: @@ -1280,7 +1275,7 @@ def feature_detection_multithreshold( if vertical_axis is None: # We need to determine vertical axis. # first, find the name of the vertical axis - vertical_axis_name = internal_utils.find_vertical_axis_from_coord( + vertical_axis_name = internal_utils.find_vertical_coord_name( field_in, vertical_coord=vertical_coord ) # then find our axis number. @@ -1341,8 +1336,8 @@ def feature_detection_multithreshold( "given in meter." ) - for i_time, data_i in enumerate(field_in.transpose(time_var_name, ...)): - time_i = data_i[time_var_name].values + for i_time, time_i in enumerate(field_in.coords[time_var_name]): + data_i = field_in.isel({time_var_name: i_time}) features_thresholds = feature_detection_multithreshold_timestep( data_i, diff --git a/tobac/segmentation.py b/tobac/segmentation.py index b59d7975..00f0b21c 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -457,7 +457,7 @@ def segmentation_timestep( hdim_1_axis = 0 hdim_2_axis = 1 elif field_in.ndim == 3: - vertical_axis = internal_utils.find_vertical_axis_from_coord( + vertical_axis = internal_utils.find_vertical_coord_name( field_in, vertical_coord=vertical_coord ) ndim_vertical = field_in.coord_dims(vertical_axis) diff --git a/tobac/tests/test_xarray_utils.py b/tobac/tests/test_xarray_utils.py index 210a1a90..a73ca31c 100644 --- a/tobac/tests/test_xarray_utils.py +++ b/tobac/tests/test_xarray_utils.py @@ -56,7 +56,7 @@ }, "latitude", # coord_looking_for None, - False, + True, ), ( ("time", "altitude", "x", "y"), # dim_names diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 1f40cc0c..be827014 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -487,9 +487,7 @@ def transform_feature_points( RADIUS_EARTH_M = 6371000 is_3D = "vdim" in features if is_3D: - vert_coord = internal_utils.find_vertical_axis_from_coord( - new_dataset, altitude_name - ) + vert_coord = internal_utils.find_vertical_coord_name(new_dataset, altitude_name) lat_coord, lon_coord = internal_utils.detect_latlon_coord_name( new_dataset, latitude_name=latitude_name, longitude_name=longitude_name diff --git a/tobac/utils/internal/basic.py b/tobac/utils/internal/basic.py index 1ba787ea..810b2977 100644 --- a/tobac/utils/internal/basic.py +++ b/tobac/utils/internal/basic.py @@ -110,7 +110,7 @@ def get_indices_of_labels_from_reg_prop_dict(region_property_dict: dict) -> tupl return [curr_loc_indices, y_indices, x_indices] -def find_vertical_axis_from_coord( +def find_vertical_coord_name( variable_cube: Union[iris.cube.Cube, xr.DataArray], vertical_coord: Union[str, None] = None, ) -> str: diff --git a/tobac/utils/internal/iris_utils.py b/tobac/utils/internal/iris_utils.py index 3e3c301f..f5210404 100644 --- a/tobac/utils/internal/iris_utils.py +++ b/tobac/utils/internal/iris_utils.py @@ -370,7 +370,7 @@ def add_coordinates_3D( ndim_vertical = vertical_coord vertical_axis = None else: - vertical_axis = tb_utils_gi.find_vertical_axis_from_coord( + vertical_axis = tb_utils_gi.find_vertical_coord_name( variable_cube, vertical_coord=vertical_coord ) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 035297cb..e7a649a1 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -5,13 +5,12 @@ import copy from typing import Union -import cftime import numpy as np import pandas as pd import xarray as xr from . import basic as tb_utils_gi -import datetime -import random, string +import random +import string def find_axis_from_dim_coord( @@ -30,8 +29,7 @@ def find_axis_from_dim_coord( Returns ------- axis_number: int - the number of the axis of the given coordinate, or None if the coordinate - is not found in the cube or not a dimensional coordinate + the number of the axis of the given coordinate Raises ------ @@ -57,8 +55,8 @@ def find_axis_from_dim_coord( return dim_axis if dim_axis is None and len(coord_axes) == 1: return coord_axes[0] - - return None + raise ValueError("Coordinate/Dimension " + dim_coord_name + " not found.") + # return None def find_axis_from_dim(in_da: xr.DataArray, dim_name: str) -> Union[int, None]: From 4d54d245d4947fcc49b4add7a4325b2a60a24b92 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 27 Feb 2024 20:45:08 -0600 Subject: [PATCH 26/32] Refactors to address @w-k-jones comments --- tobac/utils/internal/xarray_utils.py | 50 +++++++++++++++------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index e7a649a1..3f0cfe70 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -37,8 +37,10 @@ def find_axis_from_dim_coord( Returns ValueError if there are more than one matching dimension name or if the dimension/coordinate isn't found. """ - - dim_axis = find_axis_from_dim(in_da, dim_coord_name) + try: + dim_axis = find_axis_from_dim(in_da, dim_coord_name) + except ValueError: + dim_axis = None try: coord_axes = find_axis_from_coord(in_da, dim_coord_name) @@ -96,7 +98,7 @@ def find_axis_from_dim(in_da: xr.DataArray, dim_name: str) -> Union[int, None]: "More than one matching dimension. Need to specify which axis number or rename " "your dimensions." ) - return None + raise ValueError("Dimension not found. ") def find_axis_from_coord(in_da: xr.DataArray, coord_name: str) -> tuple[int]: @@ -135,18 +137,18 @@ def find_axis_from_coord(in_da: xr.DataArray, coord_name: str) -> tuple[int]: if len(all_matching_coords) > 1: raise ValueError("Too many matching coords") - return tuple() + raise ValueError("No matching coords") def find_vertical_coord_name( - variable_cube: xr.DataArray, + variable_da: xr.DataArray, vertical_coord: Union[str, None] = None, ) -> str: """Function to find the vertical coordinate in the iris cube Parameters ---------- - variable_cube: xarray.DataArray + variable_da: xarray.DataArray Input variable cube, containing a vertical coordinate. vertical_coord: str Vertical coordinate name. If None, this function tries to auto-detect. @@ -162,7 +164,7 @@ def find_vertical_coord_name( Raised if the vertical coordinate isn't found in the cube. """ - list_coord_names = variable_cube.coords + list_coord_names = variable_da.coords if vertical_coord is None or vertical_coord == "auto": # find the intersection @@ -347,14 +349,20 @@ def add_coordinates_to_features( hdim2_name_original = variable_da.dims[hdim2_axis] # generate random names for the new coordinates that are based on i, j, k values - hdim1_name_new = "".join( - random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) - for _ in range(16) - ) - hdim2_name_new = "".join( - random.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) - for _ in range(16) - ) + hdim1_name_new = "__temp_hdim1_name" + hdim2_name_new = "__temp_hdim2_name" + vdim_name_new = "__temp_vdim_name" + + if ( + hdim1_name_new in variable_da.dims + or hdim2_name_new in variable_da.dims + or vdim_name_new in variable_da.dims + ): + raise ValueError( + "Cannot have dimensions named {0}, {1}, or {2}".format( + hdim1_name_new, hdim2_name_new, vdim_name_new + ) + ) dim_new_names = { hdim1_name_original: hdim1_name_new, @@ -367,12 +375,6 @@ def add_coordinates_to_features( if is_3d: vdim_name_original = variable_da.dims[vertical_axis] - vdim_name_new = "".join( - random.choice( - string.ascii_uppercase + string.ascii_lowercase + string.digits - ) - for _ in range(16) - ) dim_interp_coords[vdim_name_new] = xr.DataArray( return_feat_df["vdim"].values, dims="features" ) @@ -383,9 +385,9 @@ def add_coordinates_to_features( # dataset renamed_dim_da = variable_da.swap_dims(dim_new_names) interpolated_df = renamed_dim_da.interp(coords=dim_interp_coords) - interpolated_df = interpolated_df.drop([hdim1_name_new, hdim2_name_new]) - if is_3d: - interpolated_df = interpolated_df.drop([vdim_name_new]) + interpolated_df = interpolated_df.drop_vars( + [hdim1_name_new, hdim2_name_new, vdim_name_new], errors="ignore" + ) return_feat_df[time_dim_name] = variable_da[time_dim_name].values[ return_feat_df["frame"] ] From d6469b1c643eb7948bb0ef39260ba324629961b4 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 12 Mar 2024 15:36:50 -0500 Subject: [PATCH 27/32] resolving merge issues --- tobac/analysis.py | 2 +- tobac/segmentation.py | 4 ++-- tobac/utils/general.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tobac/analysis.py b/tobac/analysis.py index afc11084..effbab6e 100644 --- a/tobac/analysis.py +++ b/tobac/analysis.py @@ -865,7 +865,7 @@ def calculate_areas_2Dlatlon(_2Dlat_coord, _2Dlon_coord): return area -@decorators.xarray_to_iris +@decorators.xarray_to_iris() def calculate_area(features, mask, method_area=None): """Calculate the area of the segments for each feature. diff --git a/tobac/segmentation.py b/tobac/segmentation.py index 30fb8f86..336c0f81 100644 --- a/tobac/segmentation.py +++ b/tobac/segmentation.py @@ -330,7 +330,7 @@ def segmentation_2D( ) -@decorators.xarray_to_iris +@decorators.xarray_to_iris() def segmentation_timestep( field_in: iris.cube.Cube, features_in: pd.DataFrame, @@ -1117,7 +1117,7 @@ def check_add_unseeded_across_bdrys( return markers_out -@decorators.xarray_to_iris +@decorators.xarray_to_iris() def segmentation( features: pd.DataFrame, field: iris.cube.Cube, diff --git a/tobac/utils/general.py b/tobac/utils/general.py index 6824991e..88482c17 100644 --- a/tobac/utils/general.py +++ b/tobac/utils/general.py @@ -163,7 +163,7 @@ def get_bounding_box(x, buffer=1): return bbox -@decorators.xarray_to_iris +@decorators.xarray_to_iris() def get_spacings( field_in, grid_spacing=None, time_spacing=None, average_method="arithmetic" ): From 0f4dcee076345cd858ee9f567bde3e9ae6d620c7 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 27 Mar 2024 10:02:59 -0500 Subject: [PATCH 28/32] fix for renamed functions --- tobac/analysis/spatial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tobac/analysis/spatial.py b/tobac/analysis/spatial.py index 8f1caa6d..de256261 100644 --- a/tobac/analysis/spatial.py +++ b/tobac/analysis/spatial.py @@ -11,7 +11,7 @@ from iris.analysis.cartography import area_weights from tobac.utils.bulk_statistics import get_statistics_from_mask -from tobac.utils.internal.basic import find_vertical_axis_from_coord +from tobac.utils.internal.basic import find_vertical_coord_name from tobac.utils import decorators __all__ = ( @@ -381,7 +381,7 @@ def calculate_area(features, mask, method_area=None, vertical_coord=None): mask_slice = next(mask.slices_over("time")) is_3d = len(mask_slice.core_data().shape) == 3 if is_3d: - vertical_coord_name = find_vertical_axis_from_coord(mask_slice, vertical_coord) + vertical_coord_name = find_vertical_coord_name(mask_slice, vertical_coord) # Need to get var_name as xarray uses this to label dims collapse_dim = mask_slice.coords(vertical_coord_name)[0].var_name else: From c54d5d5e23b6cf9d2a2633296af6407d9da7e2a5 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 9 May 2024 15:27:53 -0500 Subject: [PATCH 29/32] fixed from Julia's comments --- tobac/analysis.py | 0 tobac/utils/bulk_statistics.py | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) delete mode 100644 tobac/analysis.py diff --git a/tobac/analysis.py b/tobac/analysis.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tobac/utils/bulk_statistics.py b/tobac/utils/bulk_statistics.py index acab4241..cb9ce0cf 100644 --- a/tobac/utils/bulk_statistics.py +++ b/tobac/utils/bulk_statistics.py @@ -24,7 +24,7 @@ def get_statistics( features: pd.DataFrame, - labels: np.ndarray, + labels: np.ndarray[int], *fields: tuple[np.ndarray], statistic: dict[str, Union[Callable, tuple[Callable, dict]]] = { "ncells": np.count_nonzero From c07b1d55e6b4e5bc5cba7c17871b509e08da4513 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Wed, 17 Jul 2024 15:36:28 -0500 Subject: [PATCH 30/32] fix preserve_iris_datetime_types for most cases. --- tobac/tests/test_iris_xarray_match_utils.py | 28 +++++++++++++++++++++ tobac/tests/test_xarray_utils.py | 11 ++++++++ tobac/utils/internal/xarray_utils.py | 15 ++++++----- 3 files changed, 48 insertions(+), 6 deletions(-) diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index 23026da8..36db545d 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -4,6 +4,7 @@ import copy import datetime +import iris.cube import numpy as np import pandas as pd import xarray as xr @@ -191,3 +192,30 @@ def test_add_coordinates_xarray_std_names( copy.deepcopy(all_feats), da_with_coords ) pd.testing.assert_frame_equal(iris_coord_interp, xr_coord_interp) + + +def test_preserve_iris_datetime_types(): + """ + Test that xarray.add_coordinates_to_features correctly returns the same time types as + iris when preserve_iris_datetime_types = True. + """ + + all_feats = tbtest.generate_single_feature( + 0, + 0, + feature_num=1, + max_h1=10, + max_h2=10, + ) + var_array: iris.cube.Cube = tbtest.make_simple_sample_data_2D(data_type="iris") + + xarray_output = xr_utils.add_coordinates_to_features( + all_feats, xr.DataArray.from_iris(var_array), preserve_iris_datetime_types=True + ) + iris_output = iris_utils.add_coordinates(all_feats, var_array) + + pd.testing.assert_frame_equal(xarray_output, iris_output) + assert xarray_output["time"].values[0] == iris_output["time"].values[0] + assert isinstance( + xarray_output["time"].values[0], type(iris_output["time"].values[0]) + ) diff --git a/tobac/tests/test_xarray_utils.py b/tobac/tests/test_xarray_utils.py index a73ca31c..a0a9ec9e 100644 --- a/tobac/tests/test_xarray_utils.py +++ b/tobac/tests/test_xarray_utils.py @@ -139,3 +139,14 @@ def test_find_axis_from_dim_coord( assert out_val == expected_out else: assert out_val is None + + +def test_preserve_iris_datetime_types(): + """ + Tests that xarray_utils.add_coordinates_to_features correctly preserves the Iris Datetime + types when processing coordinates through xarray. + Returns + ------- + + """ + pass diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index ce138679..9ceab42e 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -415,11 +415,14 @@ def add_coordinates_to_features( return_feat_df[interp_coord_name] = interpolated_df[interp_coord].values if preserve_iris_datetime_types: - import cftime - - return_feat_df[time_dim_name] = return_feat_df[time_dim_name].apply( - lambda x: cftime.datetime( - x.year, x.month, x.day, x.hour, x.minute, x.second, x.microsecond + # We should only need to switch from datetime to DatetimeGregorian. + # if the datetime type is anything else, it stays as the original type. + if "datetime64" in str(variable_da[time_dim_name].dtype): + import cftime + + return_feat_df[time_dim_name] = return_feat_df[time_dim_name].apply( + lambda x: cftime.DatetimeGregorian( + x.year, x.month, x.day, x.hour, x.minute, x.second, x.microsecond + ) ) - ) return return_feat_df From cd19a20cdbbe8cc6dc5d91f37c50f435033637c7 Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Thu, 18 Jul 2024 12:17:13 -0500 Subject: [PATCH 31/32] updated add_coordinates_to_features to work with coordinates that vary with time --- tobac/tests/test_xarray_utils.py | 69 ++++++++++++++++++++++++---- tobac/utils/internal/xarray_utils.py | 3 ++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/tobac/tests/test_xarray_utils.py b/tobac/tests/test_xarray_utils.py index a0a9ec9e..d34e4f1e 100644 --- a/tobac/tests/test_xarray_utils.py +++ b/tobac/tests/test_xarray_utils.py @@ -8,6 +8,8 @@ import xarray as xr import tobac.utils.internal.xarray_utils as xr_utils +import tobac.testing as tbtest +import datetime @pytest.mark.parametrize( @@ -141,12 +143,63 @@ def test_find_axis_from_dim_coord( assert out_val is None -def test_preserve_iris_datetime_types(): - """ - Tests that xarray_utils.add_coordinates_to_features correctly preserves the Iris Datetime - types when processing coordinates through xarray. - Returns - ------- +@pytest.mark.parametrize( + "dim_names, coord_dim_map, feature_pos, expected_vals", + [ + ( + ["time", "x", "y"], + { + "test_coord1": (tuple(), 1), + "test_coord_time": ("time", [5, 6, 7, 8, 9, 10]), + }, + (1, 1), + {"test_coord1": (1, 1, 1), "test_coord_time": (5, 6, 7)}, + ), + ], +) +def test_add_coordinates_to_features_interpolate_along_other_dims( + dim_names: tuple[str], + coord_dim_map: dict, + feature_pos: tuple[int], + expected_vals: dict[str, tuple], +): + time_len: int = 6 + if len(feature_pos) == 2: + all_feats = tbtest.generate_single_feature( + feature_pos[0], + feature_pos[1], + feature_num=1, + num_frames=3, + max_h1=100, + max_h2=100, + ) + arr_size = (time_len, 5, 5) - """ - pass + elif len(feature_pos) == 3: + all_feats = tbtest.generate_single_feature( + feature_pos[1], + feature_pos[2], + start_v=feature_pos[0], + feature_num=1, + num_frames=3, + max_h1=100, + max_h2=100, + ) + arr_size = (time_len, 1, 5, 5) + else: + raise ValueError("too many dimensions") + coord_dim_map["time"] = ( + ("time",), + [ + datetime.datetime(2000, 1, 1, 0) + datetime.timedelta(hours=x) + for x in range(time_len) + ], + ) + + test_xr_arr = xr.DataArray(np.empty(arr_size), dims=dim_names, coords=coord_dim_map) + + resulting_df = xr_utils.add_coordinates_to_features(all_feats, test_xr_arr) + for coord in coord_dim_map: + assert coord in resulting_df + if coord != "time": + assert np.all(resulting_df[coord].values == expected_vals[coord]) diff --git a/tobac/utils/internal/xarray_utils.py b/tobac/utils/internal/xarray_utils.py index 9ceab42e..d865d80a 100644 --- a/tobac/utils/internal/xarray_utils.py +++ b/tobac/utils/internal/xarray_utils.py @@ -353,6 +353,7 @@ def add_coordinates_to_features( hdim1_name_new = "__temp_hdim1_name" hdim2_name_new = "__temp_hdim2_name" vdim_name_new = "__temp_vdim_name" + time_name_new = "__temp_time_name" if ( hdim1_name_new in variable_da.dims @@ -368,10 +369,12 @@ def add_coordinates_to_features( dim_new_names = { hdim1_name_original: hdim1_name_new, hdim2_name_original: hdim2_name_new, + time_dim_name: time_name_new, } dim_interp_coords = { hdim1_name_new: xr.DataArray(return_feat_df["hdim_1"].values, dims="features"), hdim2_name_new: xr.DataArray(return_feat_df["hdim_2"].values, dims="features"), + time_name_new: xr.DataArray(return_feat_df["frame"].values, dims="features"), } if is_3d: From 7f6930b0d918892eb7f0df8f0baaf8ceb5beff7a Mon Sep 17 00:00:00 2001 From: Sean Freeman Date: Tue, 23 Jul 2024 09:37:25 -0500 Subject: [PATCH 32/32] add keyword for preserve_iris_datetime_types to feature detection --- tobac/feature_detection.py | 12 ++++++++++-- tobac/tests/test_iris_xarray_match_utils.py | 8 ++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/tobac/feature_detection.py b/tobac/feature_detection.py index 10f96756..022d4dc5 100644 --- a/tobac/feature_detection.py +++ b/tobac/feature_detection.py @@ -1156,6 +1156,7 @@ def feature_detection_multithreshold( dz: Union[float, None] = None, strict_thresholding: bool = False, statistic: Union[dict[str, Union[Callable, tuple[Callable, dict]]], None] = None, + preserve_iris_datetime_types: bool = True, **kwargs, ) -> pd.DataFrame: """Perform feature detection based on contiguous regions. @@ -1237,6 +1238,11 @@ def feature_detection_multithreshold( If True, a feature can only be detected if all previous thresholds have been met. Default is False. + preserve_iris_datetime_types: bool, optional, default: True + If True, for iris input, preserve the original datetime type (typically + `cftime.DatetimeGregorian`) where possible. For xarray input, this parameter has no + effect. + Returns ------- features : pandas.DataFrame @@ -1403,13 +1409,15 @@ def feature_detection_multithreshold( features, field_in, vertical_coord=vertical_coord, - preserve_iris_datetime_types=kwargs["converted_from_iris"], + preserve_iris_datetime_types=kwargs["converted_from_iris"] + & preserve_iris_datetime_types, ) else: features = add_coordinates( features, field_in, - preserve_iris_datetime_types=kwargs["converted_from_iris"], + preserve_iris_datetime_types=kwargs["converted_from_iris"] + & preserve_iris_datetime_types, ) else: features = None diff --git a/tobac/tests/test_iris_xarray_match_utils.py b/tobac/tests/test_iris_xarray_match_utils.py index 36db545d..1baf6182 100644 --- a/tobac/tests/test_iris_xarray_match_utils.py +++ b/tobac/tests/test_iris_xarray_match_utils.py @@ -219,3 +219,11 @@ def test_preserve_iris_datetime_types(): assert isinstance( xarray_output["time"].values[0], type(iris_output["time"].values[0]) ) + + xarray_output_datetime_preserve_off = xr_utils.add_coordinates_to_features( + all_feats, xr.DataArray.from_iris(var_array), preserve_iris_datetime_types=False + ) + + assert isinstance( + xarray_output_datetime_preserve_off["time"].values[0], np.datetime64 + )