Skip to content

Commit

Permalink
Fix timeslice inclusive outside (#1062)
Browse files Browse the repository at this point in the history
* including outside bounds of min/max times in xarray dataset subsetting

* added tests

* updated whatsnew
  • Loading branch information
veenstrajelmer authored Jan 16, 2025
1 parent b88f0b0 commit af2ea58
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 8 deletions.
29 changes: 22 additions & 7 deletions dfm_tools/interpolate_grid2bnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,13 +381,13 @@ def open_prepare_dataset(dir_pattern, quantity, tstart, tstop, conversion_dict=N
# retrieve var(s) (after potential longitude conversion)
data_xr_vars = data_xr[[quantity]]

# slice time
check_time_extent(data_xr, tstart, tstop)
data_xr_vars = data_xr_vars.sel(time=slice(tstart,tstop))
# check time extent again to avoid issues with eg midday data being
# sliced to midnight times: https://github.com/Deltares/dfm_tools/issues/707
check_time_extent(data_xr_vars, tstart, tstop)

# slice dataset to times outside of requested time range
data_xr_vars = _ds_sel_time_outside(
ds=data_xr_vars,
tstart=tstart,
tstop=tstop,
)
#optional refdate changing
if refdate_str is not None:
if 'long_name' in data_xr_vars.time.attrs: #for CMEMS it is 'hours since 1950-01-01', which would be wrong now #TODO: consider also removing attrs for depth/varname, since we would like to have salinitybnd/waterlevel instead of Salinity/sea_surface_height in xr plots?
Expand All @@ -397,6 +397,21 @@ def open_prepare_dataset(dir_pattern, quantity, tstart, tstop, conversion_dict=N
return data_xr_vars


def _ds_sel_time_outside(ds: xr.Dataset, tstart, tstop) -> xr.Dataset:
"""
Subset the dataset on time, making sure the requested times are always
included. If there is no exact match for start/end times, the previous/next
timestamp is taken as an extreme.
Inspired by copernicusmarine.download_functions.subset_xarray.py
"""
check_time_extent(ds, tstart, tstop)
external_minimum = ds.sel(time=tstart, method="pad")
external_maximum = ds.sel(time=tstop, method="backfill")
time_slice = slice(external_minimum.time.values, external_maximum.time.values)
ds_sel = ds.sel(time=time_slice)
return ds_sel


def interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points, load=True):

ncbnd_construct = get_ncbnd_construct()
Expand Down
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Feat
- optimized performance for getting CMEMS time extents and spatial buffer in [#1059](https://github.com/Deltares/dfm_tools/pull/1059)
- replaced buffer and floor/ceil with copernicusmarine `coordinates_selection_method`, this deprecated the `buffer` argument for `dfmt.download_CMEMS()` [#1061](https://github.com/Deltares/dfm_tools/pull/1061)
- inclusive selection of outside timesteps in `open_prepare_dataset()` and thus in `cmems_nc_to_bc()` in [#1062](https://github.com/Deltares/dfm_tools/pull/1062)


## 0.32.0 (2025-01-14)
Expand Down
52 changes: 51 additions & 1 deletion tests/test_interpolate_grid2bnd.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import pytest
import dfm_tools as dfmt
import numpy as np
import pandas as pd
import datetime as dt
import xarray as xr
import shapely
Expand All @@ -22,6 +23,7 @@
ds_apply_conventions,
ds_apply_conversion_dict,
open_prepare_dataset,
_ds_sel_time_outside,
)
from dfm_tools.hydrolib_helpers import get_ncbnd_construct
import hydrolib.core.dflowfm as hcdfm
Expand Down Expand Up @@ -248,6 +250,54 @@ def test_plipointsDataset_to_ForcingModel_drop_allnan_points():
assert forcingmodel_object.forcing[1].name == 'abc_bnd_0004'


def test_ds_sel_time_outside():
ds = cmems_dataset_4times()

# exact outer bounds
tstart = "2019-12-31 12:00"
tstop = "2020-01-03 12:00"
ds_sel = _ds_sel_time_outside(ds, tstart, tstop)
assert ds_sel.time.values[0] <= pd.Timestamp(tstart)
assert ds_sel.time.values[1] > pd.Timestamp(tstart)
assert ds_sel.time.values[-2] < pd.Timestamp(tstop)
assert ds_sel.time.values[-1] >= pd.Timestamp(tstop)
assert len(ds_sel.time) == 4

# exact inner bounds
tstart = "2020-01-01 12:00"
tstop = "2020-01-02 12:00"
ds_sel = _ds_sel_time_outside(ds, tstart, tstop)
assert ds_sel.time.values[0] <= pd.Timestamp(tstart)
assert ds_sel.time.values[1] > pd.Timestamp(tstart)
assert ds_sel.time.values[-2] < pd.Timestamp(tstop)
assert ds_sel.time.values[-1] >= pd.Timestamp(tstop)
assert len(ds_sel.time) == 2

# inexact inner bounds
tstart = "2020-01-01"
tstop = "2020-01-03"
ds_sel = _ds_sel_time_outside(ds, tstart, tstop)
assert ds_sel.time.values[0] <= pd.Timestamp(tstart)
assert ds_sel.time.values[1] > pd.Timestamp(tstart)
assert ds_sel.time.values[-2] < pd.Timestamp(tstop)
assert ds_sel.time.values[-1] >= pd.Timestamp(tstop)
assert len(ds_sel.time) == 4

# tstart out of bounds
tstart = "2019-12-30 12:00"
tstop = "2020-01-03 12:00"
with pytest.raises(OutOfRangeError) as e:
_ds_sel_time_outside(ds, tstart, tstop)
assert "requested tstart 2019-12-30 12:00:00 outside" in str(e.value)

# tstart out of bounds
tstart = "2019-12-31 12:00"
tstop = "2030-01-03 12:00"
with pytest.raises(OutOfRangeError) as e:
_ds_sel_time_outside(ds, tstart, tstop)
assert "requested tstop 2030-01-03 12:00:00 outside" in str(e.value)


@pytest.mark.systemtest
def test_open_prepare_dataset_correctdepths(tmp_path):
"""
Expand All @@ -258,7 +308,7 @@ def test_open_prepare_dataset_correctdepths(tmp_path):
file_nc = tmp_path / 'temp_cmems_dummydata.nc'
ds_moretime.to_netcdf(file_nc)

ds_moretime_import = open_prepare_dataset(dir_pattern=file_nc, quantity='salinitybnd', tstart='2020-01-01 12:00:00', tstop='2020-01-02 12:00:00')
ds_moretime_import = open_prepare_dataset(dir_pattern=file_nc, quantity='salinitybnd', tstart='2020-01-01 12:00', tstop='2020-01-02 12:00')
assert len(ds_moretime_import.time) == 2


Expand Down

0 comments on commit af2ea58

Please sign in to comment.