diff --git a/src/libecalc/common/tabular_time_series.py b/src/libecalc/common/tabular_time_series.py new file mode 100644 index 0000000000..530bcc64c2 --- /dev/null +++ b/src/libecalc/common/tabular_time_series.py @@ -0,0 +1,56 @@ +import itertools +from typing import Protocol, TypeVar + +from libecalc.common.utils.rates import TimeSeries +from typing_extensions import Self + + +class TabularTimeSeries(Protocol): + def copy(self, deep: bool = False) -> Self: + """ + Duplicate a model + Args: + deep: set to `True` to make a deep copy of the model + + Returns: new model instance + + """ + ... + + +ObjectWithTimeSeries = TypeVar("ObjectWithTimeSeries", bound=TabularTimeSeries) + + +class TabularTimeSeriesUtils: + """ + Utility functions for objects containing TimeSeries + """ + + @classmethod + def merge(cls, *objects_with_time_series: ObjectWithTimeSeries): + """ + Merge objects containing TimeSeries. Other attributes will be copied from the first object. + Args: + *objects_with_time_series: list of objects to merge + + Returns: a merged object of the same type + + """ + # Verify that we are merging the same types + if len({type(object_with_time_series) for object_with_time_series in objects_with_time_series}) != 1: + raise ValueError("Can not merge objects of differing types.") + + first, *others = objects_with_time_series + merged_object = first.copy(deep=True) + + for key, value in first.__dict__.items(): + for other in others: + accumulated_value = merged_object.__getattribute__(key) + other_value = other.__getattribute__(key) + if key == "timesteps": + merged_timesteps = sorted(itertools.chain(accumulated_value, other_value)) + merged_object.__setattr__(key, merged_timesteps) + elif isinstance(value, TimeSeries): + merged_object.__setattr__(key, accumulated_value.merge(other_value)) + + return merged_object diff --git a/src/libecalc/common/utils/rates.py b/src/libecalc/common/utils/rates.py index 219868c1f2..67c7a70886 100644 --- a/src/libecalc/common/utils/rates.py +++ b/src/libecalc/common/utils/rates.py @@ -1,5 +1,6 @@ from __future__ import annotations +import itertools import math from abc import ABC, abstractmethod from collections import defaultdict @@ -173,6 +174,41 @@ def extend(self, other: TimeSeries) -> Self: unit=self.unit, ) + def merge(self, other: TimeSeries) -> Self: + """ + Merge two TimeSeries with differing timesteps + Args: + other: + + Returns: + + """ + if not isinstance(other, type(self)): + raise ValueError(f"Can not merge {type(self)} with {type(other)}") + + if self.unit != other.unit: + raise ValueError(f"Mismatching units: '{self.unit}' != '{other.unit}'") + + if len(set(self.timesteps).intersection(other.timesteps)) != 0: + raise ValueError("Can not merge two TimeSeries with common timesteps") + + merged_timesteps = sorted(itertools.chain(self.timesteps, other.timesteps)) + merged_values = [] + + for timestep in merged_timesteps: + if timestep in self.timesteps: + timestep_index = self.timesteps.index(timestep) + merged_values.append(self.values[timestep_index]) + else: + timestep_index = other.timesteps.index(timestep) + merged_values.append(other.values[timestep_index]) + + return self.__class__( + timesteps=merged_timesteps, + values=merged_values, + unit=self.unit, + ) + def datapoints(self) -> Iterator[Tuple[datetime, TimeSeriesValue]]: yield from zip(self.timesteps, self.values) @@ -426,7 +462,8 @@ def resample(self, freq: Frequency) -> TimeSeriesVolumesCumulative: ds_resampled = ds_interpolated.reindex(new_index) return TimeSeriesVolumesCumulative( - timesteps=ds_resampled.index.to_pydatetime().tolist(), # Are we sure this is always an DatetimeIndex? type: ignore + timesteps=ds_resampled.index.to_pydatetime().tolist(), + # Are we sure this is always an DatetimeIndex? type: ignore values=ds_resampled.values.tolist(), unit=self.unit, ) @@ -669,6 +706,58 @@ def extend(self, other: TimeSeriesRate) -> Self: # type: ignore[override] rate_type=self.rate_type, ) + def merge(self, other: TimeSeries) -> TimeSeriesRate: + """ + Merge two TimeSeries with differing timesteps + Args: + other: + + Returns: + + """ + + if not isinstance(other, TimeSeriesRate): + raise ValueError(f"Can not merge {type(self)} with {type(other)}") + + if self.unit != other.unit: + raise ValueError(f"Mismatching units: '{self.unit}' != '{other.unit}'") + + if not self.rate_type == other.rate_type: + raise ValueError( + "Mismatching rate type. Currently you can not merge stream/calendar day rates with calendar/stream day rates." + ) + + if len(set(self.timesteps).intersection(other.timesteps)) != 0: + raise ValueError("Can not merge two TimeSeries with common timesteps") + + merged_timesteps = sorted(itertools.chain(self.timesteps, other.timesteps)) + merged_values = [] + merged_regularity = [] + + for timestep in merged_timesteps: + if timestep in self.timesteps: + timestep_index = self.timesteps.index(timestep) + merged_values.append(self.values[timestep_index]) + if self.regularity is not None: + merged_regularity.append(self.regularity[timestep_index]) + else: + merged_regularity.append(1) # whaaaaaaaaaa + else: + timestep_index = other.timesteps.index(timestep) + merged_values.append(other.values[timestep_index]) + if other.regularity is not None: + merged_regularity.append(other.regularity[timestep_index]) + else: + merged_regularity.append(1) # whaaaaaaaaaa + + return self.__class__( + timesteps=merged_timesteps, + values=merged_values, + regularity=merged_regularity, + unit=self.unit, + rate_type=self.rate_type, + ) + def for_period(self, period: Period) -> Self: start_index, end_index = period.get_timestep_indices(self.timesteps) end_index = end_index + 1 # Include end as we need it to calculate cumulative correctly diff --git a/src/libecalc/core/result/results.py b/src/libecalc/core/result/results.py index d008dfd723..14255f0aee 100644 --- a/src/libecalc/core/result/results.py +++ b/src/libecalc/core/result/results.py @@ -4,9 +4,8 @@ from datetime import datetime from typing import Any, Dict, List, Optional, Union -import pandas as pd +from libecalc.common.tabular_time_series import TabularTimeSeriesUtils from libecalc.common.utils.rates import ( - TimeSeries, TimeSeriesBoolean, TimeSeriesFloat, TimeSeriesInt, @@ -32,55 +31,20 @@ class CommonResultBase(EcalcResultBaseModel): class GenericComponentResult(CommonResultBase): id: str - @property - def _columns(self) -> Dict[str, Union[List, TimeSeries]]: - """ - Returns: all attributes of a sequence type - """ - columns = {} - for key, value in self.__dict__.items(): - if isinstance(value, list): - columns[key] = value - elif isinstance(value, TimeSeriesRate): - columns[key] = value.values - if value.regularity is not None: - columns[f"{key}_regularity"] = value.regularity - elif isinstance(value, TimeSeries): - columns[key] = value.values - return columns - - @property - def _dataframe(self) -> pd.DataFrame: + def merge(self, *other_results: CompressorResult) -> Self: """ - Returns: a dataframe of all sequence types - """ - df = pd.DataFrame(self._columns) - df.set_index(["timesteps"], inplace=True) - df.index = pd.to_datetime(df.index) - return df - - def _merge_columns(self, *other_compressor_results: CompressorResult) -> Dict[str, List]: - """ - Merge all attributes of a sequence type. + Merge all attributes of TimeSeries type, while also making sure the other attributes can be merged (i.e. id should be equal). Args: - *other_compressor_results: + *other_results: Returns: """ - df = pd.concat( - [ - self._dataframe, - *[other_compressor_result._dataframe for other_compressor_result in other_compressor_results], - ], - axis="index", - verify_integrity=True, - ) - df.sort_index(inplace=True) - return { - "timesteps": [timestamp.to_pydatetime() for timestamp in df.index.tolist()], - **{str(key): list(value.values()) for key, value in df.to_dict().items()}, - } + # Verify that we are merging the same entity + if len({other_result.id for other_result in other_results}) != 1: + raise ValueError("Can not merge objects with differing ids.") + + return TabularTimeSeriesUtils.merge(self, *other_results) class GeneratorSetResult(GenericComponentResult): @@ -111,85 +75,6 @@ def get_subset(self, indices: List[int]) -> Self: outlet_pressure_before_choking=self.outlet_pressure_before_choking[indices], ) - def merge(self, *other_compressor_results: CompressorResult) -> Self: - """ - Merge all attributes of a sequence type, while also making sure the other attributes can be merged (i.e. id should be equal). - Args: - *other_compressor_results: - - Returns: - - """ - - # Verify that the results are for the same consumer - if len({self.id, *[other_compressor_result.id for other_compressor_result in other_compressor_results]}) != 1: - raise ValueError("Can not merge results with differing ids.") - - # Verify units and rate types - for key, value in self.__dict__.items(): - for other_compressor_result in other_compressor_results: - other_value = other_compressor_result.__getattribute__(key) - if isinstance(value, TimeSeriesRate): - if not isinstance(other_value, TimeSeriesRate): - raise ValueError( - f"Invalid type of {key} for compressor result with id {other_compressor_result.id}" - ) - if value.rate_type != other_value.rate_type: - raise ValueError("Rate types does not match") - - if isinstance(value, TimeSeries): - if not isinstance(other_value, TimeSeries): - raise ValueError( - f"Invalid type of {key} for compressor result with id {other_compressor_result.id}" - ) - - if value.unit != other_value.unit: - raise ValueError("Units does not match") - - merged_columns = self._merge_columns(*other_compressor_results) - timesteps = merged_columns.get("timesteps") - - return self.__class__( - id=self.id, - timesteps=timesteps, - energy_usage=TimeSeriesRate( - timesteps=timesteps, - values=merged_columns.get("energy_usage"), - unit=self.energy_usage.unit, - regularity=merged_columns.get("energy_usage_regularity"), - rate_type=self.energy_usage.rate_type, - ), - power=TimeSeriesRate( - timesteps=timesteps, - values=merged_columns.get("power"), - unit=self.power.unit, - regularity=merged_columns.get("power_regularity"), - rate_type=self.power.rate_type, - ), - is_valid=TimeSeriesBoolean( - timesteps=timesteps, - values=merged_columns.get("is_valid"), - unit=self.is_valid.unit, - ), - recirculation_loss=TimeSeriesRate( - timesteps=timesteps, - values=merged_columns.get("recirculation_loss"), - unit=self.recirculation_loss.unit, - regularity=merged_columns.get("recirculation_loss_regularity"), - rate_type=self.recirculation_loss.rate_type, - ), - rate_exceeds_maximum=TimeSeriesBoolean( - timesteps=timesteps, - values=merged_columns.get("rate_exceeds_maximum"), - unit=self.rate_exceeds_maximum.unit, - ), - outlet_pressure_before_choking=TimeSeriesFloat( - timesteps=timesteps, - values=merged_columns.get("outlet_pressure_before_choking"), - unit=self.outlet_pressure_before_choking.unit, - ), - ) - class PumpResult(GenericComponentResult): inlet_liquid_rate_m3_per_day: TimeSeriesRate @@ -210,86 +95,6 @@ def get_subset(self, indices: List[int]) -> Self: operational_head=self.operational_head[indices], ) - def merge(self, *other_pump_results: PumpResult) -> Self: - """ - Merge all attributes of a sequence type, while also making sure the other attributes can be merged (i.e. id should be equal). - Args: - *other_pump_results: - - Returns: - - """ - - # Verify that the results are for the same consumer - if len({self.id, *[other_pump_result.id for other_pump_result in other_pump_results]}) != 1: - raise ValueError("Can not merge results with differing ids.") - - # Verify units and rate types - for key, value in self.__dict__.items(): - for other_pump_result in other_pump_results: - other_value = other_pump_result.__getattribute__(key) - if isinstance(value, TimeSeriesRate): - if not isinstance(other_value, TimeSeriesRate): - raise ValueError(f"Invalid type of {key} for pump result with id {other_pump_result.id}") - if value.rate_type != other_value.rate_type: - raise ValueError("Rate types does not match") - - if isinstance(value, TimeSeries): - if not isinstance(other_value, TimeSeries): - raise ValueError(f"Invalid type of {key} for pump result with id {other_pump_result.id}") - - if value.unit != other_value.unit: - raise ValueError("Units does not match") - - merged_columns = self._merge_columns(*other_pump_results) - timesteps = merged_columns.get("timesteps") - - return self.__class__( - id=self.id, - timesteps=timesteps, - energy_usage=TimeSeriesRate( - timesteps=timesteps, - values=merged_columns.get("energy_usage"), - unit=self.energy_usage.unit, - regularity=merged_columns.get("energy_usage_regularity"), - rate_type=self.energy_usage.rate_type, - ), - power=TimeSeriesRate( - timesteps=timesteps, - values=merged_columns.get("power"), - unit=self.power.unit, - regularity=merged_columns.get("power_regularity"), - rate_type=self.energy_usage.rate_type, - ), - is_valid=TimeSeriesBoolean( - timesteps=timesteps, - values=merged_columns.get("is_valid"), - unit=self.is_valid.unit, - ), - inlet_liquid_rate_m3_per_day=TimeSeriesRate( - timesteps=timesteps, - values=merged_columns.get("inlet_liquid_rate_m3_per_day"), - unit=self.inlet_liquid_rate_m3_per_day.unit, - regularity=merged_columns.get("inlet_liquid_rate_m3_per_day_regularity"), - rate_type=self.inlet_liquid_rate_m3_per_day.rate_type, - ), - inlet_pressure_bar=TimeSeriesFloat( - timesteps=timesteps, - values=merged_columns.get("inlet_pressure_bar"), - unit=self.inlet_pressure_bar.unit, - ), - outlet_pressure_bar=TimeSeriesFloat( - timesteps=timesteps, - values=merged_columns.get("outlet_pressure_bar"), - unit=self.outlet_pressure_bar.unit, - ), - operational_head=TimeSeriesFloat( - timesteps=timesteps, - values=merged_columns.get("operational_head"), - unit=self.operational_head.unit, - ), - ) - class ConsumerModelResultBase(ABC, CommonResultBase): """The Consumer base result component.""" diff --git a/src/tests/libecalc/common/test_tabular_time_series.py b/src/tests/libecalc/common/test_tabular_time_series.py new file mode 100644 index 0000000000..14bab0ae3c --- /dev/null +++ b/src/tests/libecalc/common/test_tabular_time_series.py @@ -0,0 +1,106 @@ +from datetime import datetime +from typing import List + +import pytest +from libecalc.common.tabular_time_series import TabularTimeSeriesUtils +from libecalc.common.units import Unit +from libecalc.common.utils.rates import TimeSeriesFloat, TimeSeriesRate +from libecalc.dto.types import RateType +from pydantic import BaseModel + + +class MergeableObject(BaseModel): + string_test: str + int_test: int + float_test: float + list_of_float_test: List[float] + time_series_float: TimeSeriesFloat + time_series_rate: TimeSeriesRate + + +class TestMerge: + def test_valid_merge(self): + first_timesteps = [datetime(2020, 1, 1), datetime(2022, 1, 1)] + first = MergeableObject( + string_test="1", + int_test=15, + float_test=1.0, + list_of_float_test=[11, 12, 13, 14, 15], + time_series_rate=TimeSeriesRate( + timesteps=first_timesteps, + values=[11, 12], + unit=Unit.NORWEGIAN_KRONER, + regularity=[11, 12], + rate_type=RateType.CALENDAR_DAY, + ), + time_series_float=TimeSeriesFloat( + timesteps=first_timesteps, + values=[11, 12], + unit=Unit.NORWEGIAN_KRONER, + ), + ) + + second_timesteps = [datetime(2021, 1, 1), datetime(2023, 1, 1)] + second = MergeableObject( + string_test="2", + int_test=25, + float_test=2.0, + list_of_float_test=[21, 22, 23, 24, 25], + time_series_float=TimeSeriesFloat( + timesteps=second_timesteps, + values=[21, 22], + unit=Unit.NORWEGIAN_KRONER, + ), + time_series_rate=TimeSeriesRate( + timesteps=second_timesteps, + values=[21, 22], + unit=Unit.NORWEGIAN_KRONER, + regularity=[21, 22], + rate_type=RateType.CALENDAR_DAY, + ), + ) + + merged = TabularTimeSeriesUtils.merge(first, second) + + expected_timesteps = [datetime(2020, 1, 1), datetime(2021, 1, 1), datetime(2022, 1, 1), datetime(2023, 1, 1)] + + assert merged == MergeableObject( + string_test="1", + int_test=15, + float_test=1.0, + list_of_float_test=[11, 12, 13, 14, 15], + time_series_float=TimeSeriesFloat( + timesteps=expected_timesteps, + values=[11, 21, 12, 22], + unit=Unit.NORWEGIAN_KRONER, + ), + time_series_rate=TimeSeriesRate( + timesteps=expected_timesteps, + values=[11, 21, 12, 22], + unit=Unit.NORWEGIAN_KRONER, + regularity=[11, 21, 12, 22], + rate_type=RateType.CALENDAR_DAY, + ), + ) + + def test_invalid_types(self): + class First(BaseModel): + something: TimeSeriesFloat + + first = First( + something=TimeSeriesFloat( + timesteps=[datetime(2022, 1, 1)], + values=[1], + unit=Unit.NONE, + ) + ) + + class Other(BaseModel): + something: List[int] + + other = Other(something=[1, 2]) + + with pytest.raises(ValueError) as exc_info: + TabularTimeSeriesUtils.merge(first, other) + + assert str(exc_info.value) == "Can not merge objects of differing types." diff --git a/src/tests/libecalc/common/utils/test_rates.py b/src/tests/libecalc/common/utils/test_rates.py index 767f9b83f6..14a5f2738a 100644 --- a/src/tests/libecalc/common/utils/test_rates.py +++ b/src/tests/libecalc/common/utils/test_rates.py @@ -12,6 +12,7 @@ TimeSeriesVolumes, TimeSeriesVolumesCumulative, ) +from libecalc.dto.types import RateType def test_compute_stream_day_rate(): @@ -419,3 +420,208 @@ def test_resample_up_sampling(): rates_monthly = rates.resample(freq=Frequency.MONTH) assert len(rates_monthly) == 2 * 12 + 1 # Including January 2025. assert rates_monthly.values[::12] == [10, 20, 30] + + +class TestTimeSeriesMerge: + def test_merge_time_series_float_success(self): + """ + Use TimeSeriesFloat to test the 'generic' merge (parent class merge) + """ + + first = TimeSeriesFloat( + timesteps=[datetime(2021, 1, 1), datetime(2023, 1, 1)], + values=[11, 12], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesFloat( + timesteps=[datetime(2020, 1, 1), datetime(2022, 1, 1), datetime(2024, 1, 1), datetime(2030, 1, 1)], + values=[21, 22, 23, 24], + unit=Unit.NORWEGIAN_KRONER, + ) + + assert first.merge(second) == TimeSeriesFloat( + timesteps=[ + datetime(2020, 1, 1), + datetime(2021, 1, 1), + datetime(2022, 1, 1), + datetime(2023, 1, 1), + datetime(2024, 1, 1), + datetime(2030, 1, 1), + ], + values=[21, 11, 22, 12, 23, 24], + unit=Unit.NORWEGIAN_KRONER, + ) + + def test_merge_time_series_float_different_unit(self): + first = TimeSeriesFloat( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesFloat( + timesteps=[datetime(2020, 1, 1)], + values=[21], + unit=Unit.NORWEGIAN_KRONER_PER_DAY, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == "Mismatching units: 'NOK' != 'NOK/d'" + + def test_merge_time_series_float_overlapping_timesteps(self): + first = TimeSeriesFloat( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesFloat( + timesteps=[datetime(2020, 1, 1), datetime(2021, 1, 1)], + values=[21, 22], + unit=Unit.NORWEGIAN_KRONER, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == "Can not merge two TimeSeries with common timesteps" + + def test_merge_time_series_different_types(self): + first = TimeSeriesFloat( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesBoolean( + timesteps=[datetime(2020, 1, 1)], + values=[True], + unit=Unit.NORWEGIAN_KRONER, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == ( + "Can not merge with " + "" + ) + + def test_merge_time_series_rate_success(self): + """ + Use TimeSeriesFloat to test the 'generic' merge (parent class merge) + """ + + first = TimeSeriesRate( + timesteps=[datetime(2021, 1, 1), datetime(2023, 1, 1)], + values=[11, 12], + unit=Unit.NORWEGIAN_KRONER, + regularity=[11, 12], + rate_type=RateType.STREAM_DAY, + ) + + second = TimeSeriesRate( + timesteps=[datetime(2020, 1, 1), datetime(2022, 1, 1), datetime(2024, 1, 1), datetime(2030, 1, 1)], + values=[21, 22, 23, 24], + unit=Unit.NORWEGIAN_KRONER, + regularity=[21, 22, 23, 24], + rate_type=RateType.STREAM_DAY, + ) + + assert first.merge(second) == TimeSeriesRate( + timesteps=[ + datetime(2020, 1, 1), + datetime(2021, 1, 1), + datetime(2022, 1, 1), + datetime(2023, 1, 1), + datetime(2024, 1, 1), + datetime(2030, 1, 1), + ], + values=[21, 11, 22, 12, 23, 24], + unit=Unit.NORWEGIAN_KRONER, + regularity=[21, 11, 22, 12, 23, 24], + rate_type=RateType.STREAM_DAY, + ) + + def test_merge_time_series_rate_different_unit(self): + first = TimeSeriesRate( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesRate( + timesteps=[datetime(2020, 1, 1)], + values=[21], + unit=Unit.NORWEGIAN_KRONER_PER_DAY, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == "Mismatching units: 'NOK' != 'NOK/d'" + + def test_merge_time_series_rate_overlapping_timesteps(self): + first = TimeSeriesRate( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesRate( + timesteps=[datetime(2020, 1, 1), datetime(2021, 1, 1)], + values=[21, 22], + unit=Unit.NORWEGIAN_KRONER, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == "Can not merge two TimeSeries with common timesteps" + + def test_merge_time_series_rate_different_types(self): + first = TimeSeriesRate( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER, + ) + + second = TimeSeriesBoolean( + timesteps=[datetime(2020, 1, 1)], + values=[True], + unit=Unit.NORWEGIAN_KRONER, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == ( + "Can not merge with " + "" + ) + + def test_merge_time_series_rate_different_rate_types(self): + first = TimeSeriesRate( + timesteps=[datetime(2021, 1, 1)], + values=[11], + unit=Unit.NORWEGIAN_KRONER_PER_DAY, + rate_type=RateType.STREAM_DAY, + ) + + second = TimeSeriesRate( + timesteps=[datetime(2020, 1, 1)], + values=[21], + unit=Unit.NORWEGIAN_KRONER_PER_DAY, + rate_type=RateType.CALENDAR_DAY, + ) + + with pytest.raises(ValueError) as exc_info: + first.merge(second) + + assert str(exc_info.value) == ( + "Mismatching rate type. Currently you can not merge stream/calendar day rates " + "with calendar/stream day rates." + )