From d34cf4620a8127b9ae7d35210bb6d768e93b03ef Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 30 Jan 2025 10:07:15 +0100 Subject: [PATCH 1/7] add time and date type transformations --- .../datacube_type_change.py | 45 ++++++++++++- tests/test_date_time_unmerged.py | 67 +++++++++++++++++++ 2 files changed, 111 insertions(+), 1 deletion(-) create mode 100644 tests/test_date_time_unmerged.py diff --git a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py index 5e395c95..2ae05e89 100644 --- a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py +++ b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py @@ -1,5 +1,6 @@ from copy import deepcopy from importlib import import_module +import pandas as pd from ..datacube_transformations import DatacubeAxisTransformation @@ -75,4 +76,46 @@ def make_str(self, value): return tuple(values) -_type_to_datacube_type_change_lookup = {"int": "TypeChangeStrToInt"} +class TypeChangeStrToTimestamp(DatacubeAxisTypeChange): + def __init__(self, axis_name, new_type): + self.axis_name = axis_name + self._new_type = new_type + + def transform_type(self, value): + try: + return pd.Timestamp(value) + except ValueError: + return None + + def make_str(self, value): + values = [] + for val in value: + values.append(val.strftime('%Y%m%d')) + return tuple(values) + + +class TypeChangeStrToTimedelta(DatacubeAxisTypeChange): + def __init__(self, axis_name, new_type): + self.axis_name = axis_name + self._new_type = new_type + + def transform_type(self, value): + try: + hours = int(value[:2]) + mins = int(value[2:]) + return pd.Timedelta(hours=hours, minutes=mins) + except ValueError: + return None + + def make_str(self, value): + values = [] + for val in value: + hours = int(val.total_seconds() // 3600) + mins = int((val.total_seconds() % 3600) // 60) + values.append(f"{hours:02d}{mins:02d}") + return tuple(values) + + +_type_to_datacube_type_change_lookup = {"int": "TypeChangeStrToInt", + "date": "TypeChangeStrToTimestamp", + "time": "TypeChangeStrToTimedelta"} diff --git a/tests/test_date_time_unmerged.py b/tests/test_date_time_unmerged.py new file mode 100644 index 00000000..4d5faa23 --- /dev/null +++ b/tests/test_date_time_unmerged.py @@ -0,0 +1,67 @@ +import pandas as pd +import pytest + +from polytope_feature.polytope import Polytope, Request +from polytope_feature.shapes import Box, Select + + +class TestSlicingFDBDatacube: + def setup_method(self, method): + # Create a dataarray with 3 labelled axes using different index types + self.options = { + "axis_config": [ + {"axis_name": "step", "transformations": [{"name": "type_change", "type": "int"}]}, + {"axis_name": "date", "transformations": [{"name": "type_change", "type": "date"}]}, + {"axis_name": "time", "transformations": [{"name": "type_change", "type": "time"}]}, + { + "axis_name": "values", + "transformations": [ + {"name": "mapper", "type": "octahedral", "resolution": 1280, "axes": ["latitude", "longitude"]} + ], + }, + {"axis_name": "latitude", "transformations": [{"name": "reverse", "is_reverse": True}]}, + {"axis_name": "longitude", "transformations": [{"name": "cyclic", "range": [0, 360]}]}, + ], + "pre_path": {"class": "od", "expver": "0001", "levtype": "sfc", "stream": "oper", "type": "fc"}, + "compressed_axes_config": [ + "longitude", + "latitude", + "levtype", + "step", + "date", + "domain", + "expver", + "param", + "class", + "stream", + "type", + ], + } + + # Testing different shapes + @pytest.mark.fdb + def test_fdb_datacube(self): + import pygribjump as gj + + request = Request( + Select("step", [0]), + Select("levtype", ["sfc"]), + Select("date", [pd.Timestamp("20240118")]), + Select("time", [pd.Timedelta("00:00:00")]), + Select("domain", ["g"]), + Select("expver", ["0001"]), + Select("param", ["167"]), + Select("class", ["od"]), + Select("stream", ["oper"]), + Select("type", ["fc"]), + Box(["latitude", "longitude"], [0, 0], [0.2, 0.2]), + ) + self.fdbdatacube = gj.GribJump() + self.API = Polytope( + datacube=self.fdbdatacube, + options=self.options, + ) + result = self.API.retrieve(request) + result.pprint() + assert len(result.leaves) == 3 + assert len(result.leaves[0].result) == 3 From a57bcc1c0d34468ad112780c5fb6bbbad78f5361 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 30 Jan 2025 13:18:41 +0100 Subject: [PATCH 2/7] isort --- .../transformations/datacube_type_change/datacube_type_change.py | 1 + 1 file changed, 1 insertion(+) diff --git a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py index 2ae05e89..876cb84c 100644 --- a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py +++ b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py @@ -1,5 +1,6 @@ from copy import deepcopy from importlib import import_module + import pandas as pd from ..datacube_transformations import DatacubeAxisTransformation From e60ffe81b2afd6b7397560e1c25713304f86f51d Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 30 Jan 2025 13:21:07 +0100 Subject: [PATCH 3/7] black --- .../datacube_type_change/datacube_type_change.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py index 876cb84c..7ea518c0 100644 --- a/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py +++ b/polytope_feature/datacube/transformations/datacube_type_change/datacube_type_change.py @@ -91,7 +91,7 @@ def transform_type(self, value): def make_str(self, value): values = [] for val in value: - values.append(val.strftime('%Y%m%d')) + values.append(val.strftime("%Y%m%d")) return tuple(values) @@ -117,6 +117,8 @@ def make_str(self, value): return tuple(values) -_type_to_datacube_type_change_lookup = {"int": "TypeChangeStrToInt", - "date": "TypeChangeStrToTimestamp", - "time": "TypeChangeStrToTimedelta"} +_type_to_datacube_type_change_lookup = { + "int": "TypeChangeStrToInt", + "date": "TypeChangeStrToTimestamp", + "time": "TypeChangeStrToTimedelta", +} From 7f1ae000de4f92933aad91738860d0974e7b4e97 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 30 Jan 2025 14:25:24 +0100 Subject: [PATCH 4/7] try to fix unsliceable shape error --- polytope_feature/datacube/datacube_axis.py | 32 ++++++++++++++++++---- tests/test_date_time_unmerged.py | 6 ++-- 2 files changed, 31 insertions(+), 7 deletions(-) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index bf4dd7b8..2fbf64b4 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -2,6 +2,7 @@ from abc import ABC, abstractmethod from copy import deepcopy from typing import Any, List +import xarray as xr import numpy as np import pandas as pd @@ -144,18 +145,36 @@ def find_indices_between(self, indexes_ranges, low, up, datacube, method=None): @staticmethod def create_standard(name, values, datacube): - values = np.array(values) - DatacubeAxis.check_axis_type(name, values) - if datacube._axes is None: - datacube._axes = {name: deepcopy(_type_to_axis_lookup[values.dtype.type])} + print(name) + print(values) + print(type(values[0])) + if type(values[0]) == xr.core.variable.Variable: + values = np.array(values) + DatacubeAxis.check_axis_type_xr(name, values) + if datacube._axes is None: + datacube._axes = {name: deepcopy(_type_to_axis_lookup[values.dtype.type])} + else: + datacube._axes[name] = deepcopy(_type_to_axis_lookup[values.dtype.type]) else: - datacube._axes[name] = deepcopy(_type_to_axis_lookup[values.dtype.type]) + DatacubeAxis.check_axis_type(name, values) + if datacube._axes is None: + datacube._axes = {name: deepcopy(_type_to_axis_lookup[type(values[0])])} + else: + datacube._axes[name] = deepcopy(_type_to_axis_lookup[type(values[0])]) datacube._axes[name].name = name datacube.axis_counter += 1 @staticmethod def check_axis_type(name, values): # NOTE: The values here need to be a numpy array which has a dtype attribute + # if values.dtype.type not in _type_to_axis_lookup: + if type(values[0]) not in _type_to_axis_lookup: + raise ValueError(f"Could not create a mapper for index type {type(values[0])} for axis {name}") + + @staticmethod + def check_axis_type_xr(name, values): + # NOTE: The values here need to be a numpy array which has a dtype attribute + # if values.dtype.type not in _type_to_axis_lookup: if values.dtype.type not in _type_to_axis_lookup: raise ValueError(f"Could not create a mapper for index type {values.dtype.type} for axis {name}") @@ -302,10 +321,13 @@ def serialize(self, value): np.int64: IntDatacubeAxis(), np.datetime64: PandasTimestampDatacubeAxis(), np.timedelta64: PandasTimedeltaDatacubeAxis(), + pd.Timedelta: PandasTimedeltaDatacubeAxis(), np.float64: FloatDatacubeAxis(), np.float32: FloatDatacubeAxis(), np.int32: IntDatacubeAxis(), np.str_: UnsliceableDatacubeAxis(), str: UnsliceableDatacubeAxis(), np.object_: UnsliceableDatacubeAxis(), + int: IntDatacubeAxis(), + float: FloatDatacubeAxis(), } diff --git a/tests/test_date_time_unmerged.py b/tests/test_date_time_unmerged.py index 4d5faa23..f675a354 100644 --- a/tests/test_date_time_unmerged.py +++ b/tests/test_date_time_unmerged.py @@ -2,7 +2,7 @@ import pytest from polytope_feature.polytope import Polytope, Request -from polytope_feature.shapes import Box, Select +from polytope_feature.shapes import Box, Select, Span class TestSlicingFDBDatacube: @@ -46,8 +46,10 @@ def test_fdb_datacube(self): request = Request( Select("step", [0]), Select("levtype", ["sfc"]), - Select("date", [pd.Timestamp("20240118")]), + # Select("date", [pd.Timestamp("20240118")]), Select("time", [pd.Timedelta("00:00:00")]), + # Span("time", [pd.Timedelta("00:00:00")]), + Span("date", pd.Timestamp("20240118"), pd.Timestamp("20240119")), Select("domain", ["g"]), Select("expver", ["0001"]), Select("param", ["167"]), From a6f894368c344b2cab374d9c28db079384335233 Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 30 Jan 2025 14:29:02 +0100 Subject: [PATCH 5/7] fix last tests --- polytope_feature/datacube/datacube_axis.py | 29 ++++++++++++++-------- 1 file changed, 19 insertions(+), 10 deletions(-) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index 2fbf64b4..03b06447 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -145,22 +145,31 @@ def find_indices_between(self, indexes_ranges, low, up, datacube, method=None): @staticmethod def create_standard(name, values, datacube): - print(name) - print(values) - print(type(values[0])) - if type(values[0]) == xr.core.variable.Variable: + # print(name) + # print(values) + # print(type(values[0])) + try: + if type(values[0]) == xr.core.variable.Variable: + values = np.array(values) + DatacubeAxis.check_axis_type_xr(name, values) + if datacube._axes is None: + datacube._axes = {name: deepcopy(_type_to_axis_lookup[values.dtype.type])} + else: + datacube._axes[name] = deepcopy(_type_to_axis_lookup[values.dtype.type]) + else: + DatacubeAxis.check_axis_type(name, values) + if datacube._axes is None: + datacube._axes = {name: deepcopy(_type_to_axis_lookup[type(values[0])])} + else: + datacube._axes[name] = deepcopy(_type_to_axis_lookup[type(values[0])]) + except IndexError: values = np.array(values) DatacubeAxis.check_axis_type_xr(name, values) if datacube._axes is None: datacube._axes = {name: deepcopy(_type_to_axis_lookup[values.dtype.type])} else: datacube._axes[name] = deepcopy(_type_to_axis_lookup[values.dtype.type]) - else: - DatacubeAxis.check_axis_type(name, values) - if datacube._axes is None: - datacube._axes = {name: deepcopy(_type_to_axis_lookup[type(values[0])])} - else: - datacube._axes[name] = deepcopy(_type_to_axis_lookup[type(values[0])]) + datacube._axes[name].name = name datacube.axis_counter += 1 From 47b406392a2f07b2dbf1209e3745836cc3214efe Mon Sep 17 00:00:00 2001 From: mathleur Date: Thu, 30 Jan 2025 15:24:20 +0100 Subject: [PATCH 6/7] uniform handling of axes creation for all types --- polytope_feature/datacube/datacube_axis.py | 62 +++++++++------------- 1 file changed, 26 insertions(+), 36 deletions(-) diff --git a/polytope_feature/datacube/datacube_axis.py b/polytope_feature/datacube/datacube_axis.py index 03b06447..fbf8a70c 100644 --- a/polytope_feature/datacube/datacube_axis.py +++ b/polytope_feature/datacube/datacube_axis.py @@ -2,10 +2,10 @@ from abc import ABC, abstractmethod from copy import deepcopy from typing import Any, List -import xarray as xr import numpy as np import pandas as pd +import xarray as xr from .transformations.datacube_cyclic.datacube_cyclic import DatacubeAxisCyclic from .transformations.datacube_mappers.datacube_mappers import DatacubeMapper @@ -144,48 +144,38 @@ def find_indices_between(self, indexes_ranges, low, up, datacube, method=None): return indexes_between_ranges @staticmethod - def create_standard(name, values, datacube): - # print(name) - # print(values) - # print(type(values[0])) - try: - if type(values[0]) == xr.core.variable.Variable: - values = np.array(values) - DatacubeAxis.check_axis_type_xr(name, values) - if datacube._axes is None: - datacube._axes = {name: deepcopy(_type_to_axis_lookup[values.dtype.type])} - else: - datacube._axes[name] = deepcopy(_type_to_axis_lookup[values.dtype.type]) - else: - DatacubeAxis.check_axis_type(name, values) - if datacube._axes is None: - datacube._axes = {name: deepcopy(_type_to_axis_lookup[type(values[0])])} - else: - datacube._axes[name] = deepcopy(_type_to_axis_lookup[type(values[0])]) - except IndexError: + def values_type(values): + type_ = None + if isinstance(values, xr.core.variable.IndexVariable) or isinstance(values, xr.core.variable.Variable): + # If we have some xarray variable, transform them to actual variable type values = np.array(values) - DatacubeAxis.check_axis_type_xr(name, values) - if datacube._axes is None: - datacube._axes = {name: deepcopy(_type_to_axis_lookup[values.dtype.type])} + type_ = values.dtype.type + else: + if len(values) == 0: + # If we have no values (newly created axis), default to a float + values = np.array(values) + type_ = values.dtype.type else: - datacube._axes[name] = deepcopy(_type_to_axis_lookup[values.dtype.type]) + type_ = type(values[0]) + return type_ + + @staticmethod + def create_standard(name, values, datacube): + val_type = DatacubeAxis.values_type(values) + + DatacubeAxis.check_axis_type(name, val_type) + if datacube._axes is None: + datacube._axes = {name: deepcopy(_type_to_axis_lookup[val_type])} + else: + datacube._axes[name] = deepcopy(_type_to_axis_lookup[val_type]) datacube._axes[name].name = name datacube.axis_counter += 1 @staticmethod - def check_axis_type(name, values): - # NOTE: The values here need to be a numpy array which has a dtype attribute - # if values.dtype.type not in _type_to_axis_lookup: - if type(values[0]) not in _type_to_axis_lookup: - raise ValueError(f"Could not create a mapper for index type {type(values[0])} for axis {name}") - - @staticmethod - def check_axis_type_xr(name, values): - # NOTE: The values here need to be a numpy array which has a dtype attribute - # if values.dtype.type not in _type_to_axis_lookup: - if values.dtype.type not in _type_to_axis_lookup: - raise ValueError(f"Could not create a mapper for index type {values.dtype.type} for axis {name}") + def check_axis_type(name, val_type): + if val_type not in _type_to_axis_lookup: + raise ValueError(f"Could not create a mapper for index type {val_type} for axis {name}") transformations_order = [ From ff0110dc5224b356f261f09e2b9ce164a0281773 Mon Sep 17 00:00:00 2001 From: mathleur Date: Mon, 3 Feb 2025 16:30:09 +0100 Subject: [PATCH 7/7] update version --- polytope_feature/version.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polytope_feature/version.py b/polytope_feature/version.py index 9b719b6e..8c79b2ef 100644 --- a/polytope_feature/version.py +++ b/polytope_feature/version.py @@ -1 +1 @@ -__version__ = "1.0.25" +__version__ = "1.0.26"