diff --git a/dfm_tools/interpolate_grid2bnd.py b/dfm_tools/interpolate_grid2bnd.py index 79682ef6..5347488b 100644 --- a/dfm_tools/interpolate_grid2bnd.py +++ b/dfm_tools/interpolate_grid2bnd.py @@ -381,13 +381,13 @@ def open_prepare_dataset(dir_pattern, quantity, tstart, tstop, conversion_dict=N # retrieve var(s) (after potential longitude conversion) data_xr_vars = data_xr[[quantity]] - # slice time - check_time_extent(data_xr, tstart, tstop) - data_xr_vars = data_xr_vars.sel(time=slice(tstart,tstop)) - # check time extent again to avoid issues with eg midday data being - # sliced to midnight times: https://github.com/Deltares/dfm_tools/issues/707 - check_time_extent(data_xr_vars, tstart, tstop) - + # slice dataset to times outside of requested time range + data_xr_vars = _ds_sel_time_outside( + ds=data_xr_vars, + tstart=tstart, + tstop=tstop, + ) + #optional refdate changing if refdate_str is not None: if 'long_name' in data_xr_vars.time.attrs: #for CMEMS it is 'hours since 1950-01-01', which would be wrong now #TODO: consider also removing attrs for depth/varname, since we would like to have salinitybnd/waterlevel instead of Salinity/sea_surface_height in xr plots? @@ -397,6 +397,21 @@ def open_prepare_dataset(dir_pattern, quantity, tstart, tstop, conversion_dict=N return data_xr_vars +def _ds_sel_time_outside(ds: xr.Dataset, tstart, tstop) -> xr.Dataset: + """ + Subset the dataset on time, making sure the requested times are always + included. If there is no exact match for start/end times, the previous/next + timestamp is taken as an extreme. + Inspired by copernicusmarine.download_functions.subset_xarray.py + """ + check_time_extent(ds, tstart, tstop) + external_minimum = ds.sel(time=tstart, method="pad") + external_maximum = ds.sel(time=tstop, method="backfill") + time_slice = slice(external_minimum.time.values, external_maximum.time.values) + ds_sel = ds.sel(time=time_slice) + return ds_sel + + def interp_regularnc_to_plipointsDataset(data_xr_reg, gdf_points, load=True): ncbnd_construct = get_ncbnd_construct() diff --git a/docs/whats-new.md b/docs/whats-new.md index 7af776b3..5a9f0ef7 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -5,6 +5,7 @@ ### Feat - optimized performance for getting CMEMS time extents and spatial buffer in [#1059](https://github.com/Deltares/dfm_tools/pull/1059) - replaced buffer and floor/ceil with copernicusmarine `coordinates_selection_method`, this deprecated the `buffer` argument for `dfmt.download_CMEMS()` [#1061](https://github.com/Deltares/dfm_tools/pull/1061) +- inclusive selection of outside timesteps in `open_prepare_dataset()` and thus in `cmems_nc_to_bc()` in [#1062](https://github.com/Deltares/dfm_tools/pull/1062) ## 0.32.0 (2025-01-14) diff --git a/tests/test_interpolate_grid2bnd.py b/tests/test_interpolate_grid2bnd.py index 2e130dff..5d1724a6 100644 --- a/tests/test_interpolate_grid2bnd.py +++ b/tests/test_interpolate_grid2bnd.py @@ -9,6 +9,7 @@ import pytest import dfm_tools as dfmt import numpy as np +import pandas as pd import datetime as dt import xarray as xr import shapely @@ -22,6 +23,7 @@ ds_apply_conventions, ds_apply_conversion_dict, open_prepare_dataset, + _ds_sel_time_outside, ) from dfm_tools.hydrolib_helpers import get_ncbnd_construct import hydrolib.core.dflowfm as hcdfm @@ -248,6 +250,54 @@ def test_plipointsDataset_to_ForcingModel_drop_allnan_points(): assert forcingmodel_object.forcing[1].name == 'abc_bnd_0004' +def test_ds_sel_time_outside(): + ds = cmems_dataset_4times() + + # exact outer bounds + tstart = "2019-12-31 12:00" + tstop = "2020-01-03 12:00" + ds_sel = _ds_sel_time_outside(ds, tstart, tstop) + assert ds_sel.time.values[0] <= pd.Timestamp(tstart) + assert ds_sel.time.values[1] > pd.Timestamp(tstart) + assert ds_sel.time.values[-2] < pd.Timestamp(tstop) + assert ds_sel.time.values[-1] >= pd.Timestamp(tstop) + assert len(ds_sel.time) == 4 + + # exact inner bounds + tstart = "2020-01-01 12:00" + tstop = "2020-01-02 12:00" + ds_sel = _ds_sel_time_outside(ds, tstart, tstop) + assert ds_sel.time.values[0] <= pd.Timestamp(tstart) + assert ds_sel.time.values[1] > pd.Timestamp(tstart) + assert ds_sel.time.values[-2] < pd.Timestamp(tstop) + assert ds_sel.time.values[-1] >= pd.Timestamp(tstop) + assert len(ds_sel.time) == 2 + + # inexact inner bounds + tstart = "2020-01-01" + tstop = "2020-01-03" + ds_sel = _ds_sel_time_outside(ds, tstart, tstop) + assert ds_sel.time.values[0] <= pd.Timestamp(tstart) + assert ds_sel.time.values[1] > pd.Timestamp(tstart) + assert ds_sel.time.values[-2] < pd.Timestamp(tstop) + assert ds_sel.time.values[-1] >= pd.Timestamp(tstop) + assert len(ds_sel.time) == 4 + + # tstart out of bounds + tstart = "2019-12-30 12:00" + tstop = "2020-01-03 12:00" + with pytest.raises(OutOfRangeError) as e: + _ds_sel_time_outside(ds, tstart, tstop) + assert "requested tstart 2019-12-30 12:00:00 outside" in str(e.value) + + # tstart out of bounds + tstart = "2019-12-31 12:00" + tstop = "2030-01-03 12:00" + with pytest.raises(OutOfRangeError) as e: + _ds_sel_time_outside(ds, tstart, tstop) + assert "requested tstop 2030-01-03 12:00:00 outside" in str(e.value) + + @pytest.mark.systemtest def test_open_prepare_dataset_correctdepths(tmp_path): """ @@ -258,7 +308,7 @@ def test_open_prepare_dataset_correctdepths(tmp_path): file_nc = tmp_path / 'temp_cmems_dummydata.nc' ds_moretime.to_netcdf(file_nc) - ds_moretime_import = open_prepare_dataset(dir_pattern=file_nc, quantity='salinitybnd', tstart='2020-01-01 12:00:00', tstop='2020-01-02 12:00:00') + ds_moretime_import = open_prepare_dataset(dir_pattern=file_nc, quantity='salinitybnd', tstart='2020-01-01 12:00', tstop='2020-01-02 12:00') assert len(ds_moretime_import.time) == 2