Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

align code with min_coverage and modular newyear clipping #172

Merged
16 changes: 9 additions & 7 deletions examples/KWK_process.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@


nap_correction = False
min_coverage = 0.9 # for tidalindicators and slotgemiddelde #TODO: can also be used for havengetallen and gemgetij
min_coverage = 0.9
drop_duplicates = True

compute_indicators = True
Expand Down Expand Up @@ -122,8 +122,7 @@


#### SLOTGEMIDDELDEN
# TODO: nodal cycle is not in same phase for all stations, this is not physically correct.
# TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND)
# TODO: more data is needed for proper working of fitting for some stations (2011: BAALHK, BRESKVHVN, GATVBSLE, SCHAARVDND) >> still after linear?
if compute_slotgem:
print(f'slotgemiddelden for {current_station}')

Expand Down Expand Up @@ -176,7 +175,7 @@
### HAVENGETALLEN
if compute_havengetallen:
print(f'havengetallen for {current_station}')
df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_todate, return_df_ext=True)
df_havengetallen, df_HWLW = kw.calc_havengetallen(df_ext=df_ext_todate, return_df_ext=True, min_coverage=min_coverage)

# plot hwlw per timeclass including median
fig, axs = kw.plot_HWLW_pertimeclass(df_ext=df_HWLW, df_havengetallen=df_havengetallen)
Expand All @@ -201,13 +200,16 @@
# derive getijkrommes: raw, scaled to havengetallen, scaled to havengetallen and 12h25min period
gemgetij_raw = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=None,
freq=pred_freq, nb=0, nf=0,
scale_extremes=False, scale_period=False)
scale_extremes=False, scale_period=False,
min_coverage=min_coverage)
gemgetij_corr = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=df_ext_todate,
freq=pred_freq, nb=1, nf=1,
scale_extremes=True, scale_period=False)
scale_extremes=True, scale_period=False,
min_coverage=min_coverage)
gemgetij_corr_boi = kw.calc_gemiddeldgetij(df_meas=df_meas_todate, df_ext=df_ext_todate,
freq=pred_freq, nb=0, nf=4,
scale_extremes=True, scale_period=True)
scale_extremes=True, scale_period=True,
min_coverage=min_coverage)

# TODO: the shape of the validation lines are different, so compare krommes to gele boekje instead
# p:\archivedprojects\11205258-005-kpp2020_rmm-g5\C_Work\00_KenmerkendeWaarden\07_Figuren\figures_ppSCL_2\final20201211
Expand Down
12 changes: 4 additions & 8 deletions kenmerkendewaarden/gemiddeldgetij.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,9 +52,11 @@ def calc_gemiddeldgetij(
Timeseries of waterlevel extremes (1/2 only). The last 10 years of this
timeseries are used to compute the getijkrommes. The default is None.
min_coverage : float, optional
The minimal required coverage of the df_ext timeseries. Passed on to `calc_havengetallen()`. The default is None.
The minimal required coverage of the df_ext timeseries. Passed on to
`calc_havengetallen()`. The default is None.
freq : str, optional
Frequency of the prediction, a value of 60 seconds or lower is adivisable for decent results. The default is "60sec".
Frequency of the prediction, a value of 60 seconds or lower is adivisable for
decent results. The default is "60sec".
nb : int, optional
Amount of periods to repeat backward. The default is 0.
nf : int, optional
Expand Down Expand Up @@ -351,12 +353,6 @@ def get_gemgetij_components(data_pd_meas):
# components should not be reduced, since higher harmonics are necessary
comp_frommeasurements_avg, _ = calc_getijcomponenten(df_meas=data_pd_meas)

# #check if all years are available
# comp_years = comp_frommeasurements_allyears['A'].columns
# expected_years = tstop_dt.year-tstart_dt.year
# if len(comp_years) < expected_years:
# raise Exception('ERROR: analysis result contains not all years')

# check if nans in analysis
if comp_frommeasurements_avg.isnull()["A"].any():
raise ValueError("analysis result contains nan values")
Expand Down
6 changes: 2 additions & 4 deletions kenmerkendewaarden/slotgemiddelden.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
calc_wltidalindicators,
calc_HWLWtidalindicators,
)
from kenmerkendewaarden.utils import clip_timeseries_last_newyearsday
import logging

__all__ = [
Expand Down Expand Up @@ -57,10 +58,7 @@ def calc_slotgemiddelden(
slotgemiddelden_dict = {}

# clip last value of the timeseries if this is exactly newyearsday
if df_meas.index[-1] == pd.Timestamp(
df_meas.index[-1].year, 1, 1, tz=df_meas.index.tz
):
df_meas = df_meas.iloc[:-1]
df_meas = clip_timeseries_last_newyearsday(df_meas)

# calculate yearly means
dict_wltidalindicators = calc_wltidalindicators(df_meas, min_coverage=min_coverage)
Expand Down
10 changes: 8 additions & 2 deletions kenmerkendewaarden/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,16 @@ def raise_extremes_with_aggers(df_ext):
)


def crop_timeseries_last_nyears(df, nyears):
# remove last timestep if equal to "yyyy-01-01 00:00:00"
def clip_timeseries_last_newyearsday(df):
# clip last value of the timeseries if this is exactly newyearsday
# so remove last timestep if equal to "yyyy-01-01 00:00:00"
if '-01-01 00:00:00' in str(df.index[-1]):
df = df.iloc[:-1]
return df


def crop_timeseries_last_nyears(df, nyears):
df = clip_timeseries_last_newyearsday(df)

# last_year, for instance 2020
last_year = df.index[-1].year
Expand Down
9 changes: 9 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"""
import pytest
from kenmerkendewaarden.utils import (raise_extremes_with_aggers,
clip_timeseries_last_newyearsday,
crop_timeseries_last_nyears)
import pandas as pd
import numpy as np
Expand All @@ -27,6 +28,14 @@ def test_raise_extremes_with_aggers_pass_12df(df_ext_12_2010):
raise_extremes_with_aggers(df_ext_12_2010)


@pytest.mark.unittest
def test_clip_timeseries_last_newyearsday(df_meas, df_meas_2010):
df_meas_clipped = clip_timeseries_last_newyearsday(df_meas)
df_meas_2010_clipped = clip_timeseries_last_newyearsday(df_meas_2010)
assert len(df_meas_clipped) == len(df_meas)-1
assert len(df_meas_2010_clipped) == len(df_meas_2010)


@pytest.mark.unittest
def test_crop_timeseries_last_nyears(df_meas):
assert df_meas.index[0] == pd.Timestamp("1987-01-01 00:00:00+01:00 ")
Expand Down
Loading