Skip to content

Commit

Permalink
fix: ensure that start date in global time vector is consistent with …
Browse files Browse the repository at this point in the history
…the requested output frequency (#269)

chore: possibility to include start and end date in resampling

chore: extend tests of time series resampling
  • Loading branch information
olelod authored Nov 16, 2023
1 parent 2072e9e commit e8ef9b9
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 77 deletions.
8 changes: 3 additions & 5 deletions src/ecalc_cli/io/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,9 @@ def export_tsv(
Returns:
"""
resampled_timevector = resample_time_steps(
results.timesteps,
frequency,
remove_last=True, # last step is always added as a STOP, and does infer the end of the time vector
)
resampled_timevector = resample_time_steps(results.timesteps, frequency)[
:-1
] # last step is always added as a STOP, and does infer the end of the time vector

prognosis_filter = config.filter(frequency=frequency)
result = prognosis_filter.filter(results, resampled_timevector)
Expand Down
62 changes: 46 additions & 16 deletions src/libecalc/common/time_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,30 +166,60 @@ def formatstring(self) -> str:
def resample_time_steps(
time_steps: List[datetime],
frequency: Frequency,
remove_last: bool = False,
include_start_date: bool = True,
include_end_date: bool = True,
) -> List[datetime]:
"""Makes a time vector, based on the first and last date in time_vector and the frequency.
:param time_steps: The original time vector
:type time_steps: List[datetime]
:param frequency: The reporting frequency
:type frequency: Frequency
:param remove_last: Decides whether the final date should be returned
:type remove_last: bool
:return: Time vector with dates according to frequency, start and end date
"rtype: List[datetime]
"""Makes a time vector, based on the first and last date in time_vector and the frequency
Args:
time_steps: The original time vector
frequency: The reporting frequency
include_start_date: Whether to include the start date if it is not part of the requested reporting frequency
include_end_date: Whether to include the end date if it is not part of the requested reporting frequency
Returns: Time vector with dates according to given input
"""
if frequency is not Frequency.NONE:
time_step_vector = create_time_steps(start=time_steps[0], end=time_steps[-1], frequency=frequency)
time_step_vector = create_time_steps(
start=time_steps[0],
end=time_steps[-1],
frequency=frequency,
include_start_date=include_start_date,
include_end_date=include_end_date,
)
else:
time_step_vector = time_steps

return time_step_vector[:-1] if remove_last else time_step_vector
return time_step_vector


def create_time_steps(
frequency: Frequency, start: datetime, end: datetime, include_start_date: bool, include_end_date: bool
) -> List[datetime]:
"""
Args:
frequency: The requested frequency
start: The start date
end: The end date
include_start_date: Whether to include the start date if it is not part of the requested frequency
include_end_date: Whether to include the end date if it is not part of the requested frequency
Returns:
A list of dates (and possibly including the start/end dates) between the given start and end dates following
the requested frequency
"""
date_range = pd.date_range(start=start, end=end, freq=frequency.value)

time_steps = [clear_time(time_step) for time_step in date_range]
if include_start_date:
time_steps = [clear_time(start)] + time_steps
if include_end_date:
time_steps = [clear_time(end)] + time_steps

def create_time_steps(frequency: Frequency, start: datetime, end: datetime) -> List[datetime]:
time_steps = pd.date_range(start=start, end=end, freq=frequency.value)
return sorted({clear_time(start), *[clear_time(time_step) for time_step in time_steps], clear_time(end)})
return sorted(set(time_steps))


def clear_time(d: datetime) -> datetime:
Expand Down
92 changes: 52 additions & 40 deletions src/libecalc/common/utils/rates.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@
from libecalc.common.list.list_utils import elementwise_sum
from libecalc.common.logger import logger
from libecalc.common.string.string_utils import to_camel_case
from libecalc.common.time_utils import Frequency, Period, calculate_delta_days
from libecalc.common.time_utils import (
Frequency,
Period,
calculate_delta_days,
resample_time_steps,
)
from libecalc.common.units import Unit
from numpy.typing import NDArray
from pydantic import Extra, validator
Expand Down Expand Up @@ -191,7 +196,7 @@ def __lt__(self, other) -> bool:
return all(self_value < other_value for self_value, other_value in zip(self.values, other.values))

@abstractmethod
def resample(self, freq: Frequency) -> Self:
def resample(self, freq: Frequency, include_start_date: bool, include_end_date: bool) -> Self:
...

def extend(self, other: TimeSeries) -> Self:
Expand Down Expand Up @@ -365,7 +370,7 @@ def __eq__(self, other: object) -> bool:


class TimeSeriesString(TimeSeries[str]):
def resample(self, freq: Frequency) -> Self:
def resample(self, freq: Frequency, include_start_date: bool, include_end_date: bool) -> Self:
"""
Resample using forward-fill This means that a value is assumed to be the same until the next observation,
e.g. covering the whole period interval.
Expand All @@ -392,7 +397,7 @@ def resample(self, freq: Frequency) -> Self:


class TimeSeriesInt(TimeSeries[int]):
def resample(self, freq: Frequency) -> Self:
def resample(self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True) -> Self:
"""
Resample using forward-fill This means that a value is assumed to be the same until the next observation,
e.g. covering the whole period interval.
Expand All @@ -408,18 +413,21 @@ def resample(self, freq: Frequency) -> Self:

ds = pd.Series(index=self.timesteps, data=self.values)

new_timesteps = resample_time_steps(
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=include_end_date
)
# New resampled pd.Series
ds_resampled = ds.resample(freq).ffill()
ds_resampled = ds.reindex(new_timesteps).ffill()

return TimeSeriesInt(
timesteps=ds_resampled.index.to_pydatetime().tolist(),
timesteps=new_timesteps,
values=list(ds_resampled.values.tolist()),
unit=self.unit,
)


class TimeSeriesBoolean(TimeSeries[bool]):
def resample(self, freq: Frequency) -> Self:
def resample(self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True) -> Self:
"""
Resample using forward-fill This means that a value is assumed to be the same until the next observation,
e.g. covering the whole period interval.
Expand All @@ -435,16 +443,13 @@ def resample(self, freq: Frequency) -> Self:

ds = pd.Series(index=self.timesteps, data=self.values)

target_index = ds.resample(freq.value).asfreq().index

# Union of old and new index. Forward-fill missing values.
ds_tmp = ds.reindex(ds.index.union(target_index)).ffill()

# New resampled pd.Series
ds_resampled = ds_tmp.groupby(pd.Grouper(freq=freq.value)).all()
new_timeseries = resample_time_steps(
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=include_end_date
)
ds_resampled = ds.reindex(new_timeseries).ffill()

return TimeSeriesBoolean(
timesteps=ds_resampled.index.to_pydatetime().tolist(),
timesteps=new_timeseries,
values=[bool(x) for x in ds_resampled.values.tolist()],
unit=self.unit,
)
Expand All @@ -463,7 +468,7 @@ def __mul__(self, other: object) -> Self:


class TimeSeriesFloat(TimeSeries[float]):
def resample(self, freq: Frequency) -> Self:
def resample(self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True) -> Self:
"""
Resample using forward-fill This means that a value is assumed to be the same until the next observation,
e.g. covering the whole period interval.
Expand All @@ -477,18 +482,15 @@ def resample(self, freq: Frequency) -> Self:
if freq is Frequency.NONE:
return self.copy()

pandas_data_series = pd.Series(index=self.timesteps, data=self.values)

target_index = pandas_data_series.resample(freq.value).asfreq().index

# Union of old and new index. Forward-fill missing values.
ds_tmp = pandas_data_series.reindex(pandas_data_series.index.union(target_index)).ffill()
ds = pd.Series(index=self.timesteps, data=self.values)

# New resampled pd.Series
ds_resampled = ds_tmp.groupby(pd.Grouper(freq=freq.value)).first()
new_timeseries = resample_time_steps(
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=include_end_date
)
ds_resampled = ds.reindex(new_timeseries).ffill()

return TimeSeriesFloat(
timesteps=ds_resampled.index.to_pydatetime().tolist(),
timesteps=new_timeseries,
values=[float(x) for x in ds_resampled.values.tolist()],
unit=self.unit,
)
Expand All @@ -502,7 +504,9 @@ def reindex(self, new_time_vector: Iterable[datetime]) -> TimeSeriesFloat:


class TimeSeriesVolumesCumulative(TimeSeries[float]):
def resample(self, freq: Frequency) -> TimeSeriesVolumesCumulative:
def resample(
self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True
) -> TimeSeriesVolumesCumulative:
"""
Resample cumulative production volumes according to given frequency. Since the production rates between
dates are assumed to be constant, the cumulative production volumes will increase linearly between dates.
Expand All @@ -522,20 +526,22 @@ def resample(self, freq: Frequency) -> TimeSeriesVolumesCumulative:
return self.copy()

ds = pd.Series(index=self.timesteps, data=self.values)
new_index = ds.resample(freq.value).asfreq().index
if ds.index[-1] not in new_index:
new_timeseries = resample_time_steps(
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=include_end_date
)
if ds.index[-1] not in new_timeseries:
logger.warning(
f"The final date in the rate input ({ds.index[-1].strftime('%m/%d/%Y')}) does not "
f"correspond to the end of a period with the requested output frequency. There is a "
f"possibility that the resampling will drop volumes."
)
ds_interpolated = ds.reindex(ds.index.union(new_index)).interpolate("slinear")
ds_interpolated = ds.reindex(ds.index.union(new_timeseries)).interpolate("slinear")

# New resampled pd.Series
ds_resampled = ds_interpolated.reindex(new_index)
ds_resampled = ds_interpolated.reindex(new_timeseries)

return TimeSeriesVolumesCumulative(
timesteps=ds_resampled.index.to_pydatetime().tolist(),
timesteps=new_timeseries,
# Are we sure this is always an DatetimeIndex? type: ignore
values=ds_resampled.values.tolist(),
unit=self.unit,
Expand Down Expand Up @@ -593,7 +599,7 @@ def check_length_timestep_values(cls, v: List[Any], values: Dict[str, Any]):
)
return v

def resample(self, freq: Frequency):
def resample(self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True):
msg = (
f"{self.__repr_name__()} does not have an resample method."
f" You should not land here. Please contact the eCalc Support."
Expand Down Expand Up @@ -681,7 +687,9 @@ def to_rate(self, regularity: Optional[List[float]] = None) -> TimeSeriesRate:


class TimeSeriesIntensity(TimeSeries[float]):
def resample(self, freq: Frequency) -> TimeSeriesIntensity:
def resample(
self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True
) -> TimeSeriesIntensity:
"""
Resample emission intensity according to given frequency.
Slinear is used in order to only interpolate, not extrapolate.
Expand All @@ -696,14 +704,16 @@ def resample(self, freq: Frequency) -> TimeSeriesIntensity:
return self.copy()

ds = pd.Series(index=self.timesteps, data=self.values)
new_index = ds.resample(freq.value).asfreq().index
ds_interpolated = ds.reindex(ds.index.union(new_index)).interpolate("slinear")
new_timeseries = resample_time_steps(
self.timesteps, frequency=freq, include_start_date=include_start_date, include_end_date=include_end_date
)
ds_interpolated = ds.reindex(ds.index.union(new_timeseries)).interpolate("slinear")

# New resampled pd.Series
ds_resampled = ds_interpolated.reindex(new_index)
ds_resampled = ds_interpolated.reindex(new_timeseries)

return TimeSeriesIntensity(
timesteps=ds_resampled.index.to_pydatetime().tolist(), # type: ignore
timesteps=new_timeseries,
values=ds_resampled.to_numpy().tolist(),
unit=self.unit,
)
Expand Down Expand Up @@ -966,7 +976,9 @@ def to_volumes(self) -> TimeSeriesVolumes:

return TimeSeriesVolumes(timesteps=self.timesteps, values=volumes, unit=self.unit.rate_to_volume())

def resample(self, freq: Frequency) -> TimeSeriesRate:
def resample(
self, freq: Frequency, include_start_date: bool = True, include_end_date: bool = True
) -> TimeSeriesRate:
"""
Resample to average rate. If a period at the given frequency spans multiple input periods, the rate will be a
weighted average or the rates in those periods. The regularity is also recalculated to reflect the new
Expand Down Expand Up @@ -998,7 +1010,7 @@ def resample(self, freq: Frequency) -> TimeSeriesRate:
timesteps=self.timesteps,
unit=self.to_volumes().unit,
)
.resample(freq=freq)
.resample(freq=freq, include_start_date=include_start_date, include_end_date=include_end_date)
.to_volumes()
)
# make resampled stream day volumes via cumulative "stream-day-volumes"
Expand All @@ -1011,7 +1023,7 @@ def resample(self, freq: Frequency) -> TimeSeriesRate:
timesteps=self.timesteps,
unit=self.to_volumes().unit,
)
.resample(freq=freq)
.resample(freq=freq, include_start_date=include_start_date, include_end_date=include_end_date)
.to_volumes()
)

Expand Down
6 changes: 2 additions & 4 deletions src/libecalc/dto/result/tabular_time_series.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
from typing_extensions import Self

from libecalc.common.time_utils import Frequency
from libecalc.common.time_utils import Frequency, resample_time_steps
from libecalc.common.units import Unit
from libecalc.common.utils.rates import (
RateType,
Expand Down Expand Up @@ -102,7 +102,5 @@ def resample(self, freq: Frequency) -> Self:
# NOTE: turbine_result is not resampled. Should add support?
pass

resampled.timesteps = (
pd.date_range(start=self.timesteps[0], end=self.timesteps[-1], freq=freq.value).to_pydatetime().tolist()
)
resampled.timesteps = resample_time_steps(self.timesteps, frequency=freq)
return resampled
Loading

0 comments on commit e8ef9b9

Please sign in to comment.