Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,9 @@ New Features
- ``compute=False`` is now supported by :py:meth:`DataTree.to_netcdf` and
:py:meth:`DataTree.to_zarr`.
By `Stephan Hoyer <https://github.com/shoyer>`_.
- ``.sel`` operations now support the ``method`` and ``tolerance`` keyword arguments,
for the case of indexing with a slice.
By `Tom Nicholas <https://github.com/TomNicholas>`_.
- ``open_dataset`` will now correctly infer a path ending in ``.zarr/`` as zarr
By `Ian Hunt-Isaak <https://github.com/ianhi>`_.

Expand Down
69 changes: 54 additions & 15 deletions xarray/core/indexes.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,23 +560,62 @@ def _sanitize_slice_element(x):


def _query_slice(index, label, coord_name="", method=None, tolerance=None):
slice_label_start = _sanitize_slice_element(label.start)
slice_label_stop = _sanitize_slice_element(label.stop)
slice_index_step = _sanitize_slice_element(label.step)

if method is not None or tolerance is not None:
raise NotImplementedError(
"cannot use ``method`` argument if any indexers are slice objects"
)
indexer = index.slice_indexer(
_sanitize_slice_element(label.start),
_sanitize_slice_element(label.stop),
_sanitize_slice_element(label.step),
)
if not isinstance(indexer, slice):
# unlike pandas, in xarray we never want to silently convert a
# slice indexer into an array indexer
raise KeyError(
"cannot represent labeled-based slice indexer for coordinate "
f"{coord_name!r} with a slice over integer positions; the index is "
"unsorted or non-unique"
# `pandas.Index.slice_indexer` doesn't support method or tolerance (see https://github.com/pydata/xarray/issues/10710)

if index.has_duplicates:
# `pandas.Index.get_indexer` disallows this, see https://github.com/pydata/xarray/pull/10711#discussion_r2331297608
raise NotImplementedError(
"cannot use ``method`` argument with a slice object as an indexer and an index with non-unique values"
)

if method is None and tolerance is not None:
# copies default behaviour of slicing with no tolerance, which is to be exclusive at both ends
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the default is inclusive?

Suggested change
# copies default behaviour of slicing with no tolerance, which is to be exclusive at both ends
# copies default behaviour of slicing with no tolerance, which is to be inclusive at both ends

slice_index_start = index.get_indexer(
[slice_label_start], method="backfill", tolerance=tolerance
)
slice_index_stop = index.get_indexer(
[slice_label_stop], method="pad", tolerance=tolerance
)
else:
# minor optimization to only issue a single `.get_indexer` call to get both start and end
slice_index_start, slice_index_stop = index.get_indexer(
[slice_label_start, slice_label_stop],
method=method,
tolerance=tolerance,
)

if -1 in [slice_index_start, slice_index_stop]:
# how pandas indicates the "no match" case - we return empty slice
indexer = slice(0, 0)
else:
# +1 needed to emulate behaviour of xarray sel with slice without method kwarg, which is inclusive of point at stop label
# assumes no duplicates, but we have forbidden that case above
indexer = slice(
slice_index_start.item(),
slice_index_stop.item() + 1,
slice_index_step,
)
else:
indexer = index.slice_indexer(
slice_label_start,
slice_label_stop,
slice_index_step,
)

if not isinstance(indexer, slice):
# unlike pandas, in xarray we never want to silently convert a
# slice indexer into an array indexer
raise KeyError(
"cannot represent labeled-based slice indexer for coordinate "
f"{coord_name!r} with a slice over integer positions; the index is "
"unsorted or non-unique"
)

return indexer


Expand Down
113 changes: 111 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -2164,8 +2164,13 @@ def test_sel_method(self) -> None:
actual = data.sel(dim2=[1.45], method="backfill")
assert_identical(expected, actual)

with pytest.raises(NotImplementedError, match=r"slice objects"):
data.sel(dim2=slice(1, 3), method="ffill")
expected = data.isel(dim2=slice(2, 7))
actual = data.sel(dim2=slice(1, 3), method="ffill")
assert_identical(expected, actual)

expected = data.isel(dim2=slice(2, 7, 2))
actual = data.sel(dim2=slice(1, 3, 2), method="ffill")
assert_identical(expected, actual)
Comment on lines +2167 to +2173
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

add a test for method='backfill'?


with pytest.raises(TypeError, match=r"``method``"):
# this should not pass silently
Expand All @@ -2175,6 +2180,110 @@ def test_sel_method(self) -> None:
with pytest.raises(ValueError, match=r"cannot supply"):
data.sel(dim1=0, method="nearest")

def test_sel_method_with_slice(self) -> None:
# regression test for https://github.com/pydata/xarray/issues/10710

data_int_coords = xr.Dataset(coords={"lat": ("lat", [20, 21, 22, 23])})
expected = xr.Dataset(coords={"lat": ("lat", [21, 22])})
actual = data_int_coords.sel(lat=slice(21, 22), method="nearest")
assert_identical(expected, actual)

# check non-zero step
expected = xr.Dataset(coords={"lat": ("lat", [21])})
actual = data_int_coords.sel(lat=slice(21, 22, 2), method="nearest")
assert_identical(expected, actual)

# check consistency with not passing method kwarg, for case of ints, where method kwarg should be irrelevant
expected = data_int_coords.sel(lat=slice(21, 22))
actual = data_int_coords.sel(lat=slice(21, 22), method="nearest")
assert_identical(expected, actual)

data_float_coords = xr.Dataset(
coords={"lat": ("lat", [20.1, 21.1, 22.1, 23.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])})
actual = data_float_coords.sel(lat=slice(21, 22), method="nearest")
assert_identical(expected, actual)

# "no match" case - should return zero-size slice
expected = xr.Dataset(coords={"lat": ("lat", [])})
actual = data_float_coords.sel(
lat=slice(21.5, 21.6), method="nearest", tolerance=1e-3
)
assert_identical(expected, actual)

# test supposed default behaviour
expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])})
actual = data_float_coords.sel(lat=slice(21.0, 22.2))
assert_identical(expected, actual)

# tolerance specified but method not specified
expected = xr.Dataset(coords={"lat": ("lat", [21.1, 22.1])})
actual = data_float_coords.sel(
lat=slice(21.0, 22.2),
tolerance=1.0,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

tolerance=0.0 might be a cleaner test here

)
assert_identical(expected, actual)
# test this matches default behaviour without tolerance specified
default = data_float_coords.sel(lat=slice(21.0, 22.2))
assert_identical(default, actual)

# "no match" case - should return zero-size slice
expected = xr.Dataset(coords={"lat": ("lat", [])})
actual = data_float_coords.sel(
lat=slice(21.5, 21.6), method="nearest", tolerance=1e-3
)
assert_identical(expected, actual)

# non-unique coordinate values
data_non_unique = xr.Dataset(
coords={"lat": ("lat", [20.1, 21.1, 21.1, 22.1, 22.1, 23.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [21.1, 21.1, 22.1, 22.1])})
with pytest.raises(
NotImplementedError,
match="slice object as an indexer and an index with non-unique values",
):
data_non_unique.sel(lat=slice(21.0, 22.2), method="nearest")

# check non-zero step
data_float_coords = xr.Dataset(
coords={"lat": ("lat", [20.1, 21.1, 22.1, 23.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [21.1])})
actual = data_float_coords.sel(lat=slice(21, 22, 2), method="nearest")
assert_identical(expected, actual)

# backwards slices
data_int_coords = xr.Dataset(coords={"lat": ("lat", [23, 22, 21, 20])})
expected = xr.Dataset(coords={"lat": ("lat", [22, 21])})
actual = data_int_coords.sel(lat=slice(22, 21), method="nearest")
assert_identical(expected, actual)

data_float_coords = xr.Dataset(
coords={"lat": ("lat", [23.1, 22.1, 21.1, 20.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [22.1, 21.1])})
actual = data_float_coords.sel(lat=slice(22, 21), method="nearest")
assert_identical(expected, actual)

def test_sel_negative_slices(self) -> None:
data_int_coords = xr.Dataset(coords={"lat": ("lat", [-23, -22, -21, -20])})
expected = xr.Dataset(coords={"lat": ("lat", [-22, -21])})
actual = data_int_coords.sel(lat=slice(-22, -21))
assert_identical(expected, actual)

expected = xr.Dataset(coords={"lat": ("lat", [-22, -21])})
actual = data_int_coords.sel(lat=slice(-22, -21), method="nearest")
assert_identical(expected, actual)

data_float_coords = xr.Dataset(
coords={"lat": ("lat", [-23.1, -22.1, -21.1, -20.1])}
)
expected = xr.Dataset(coords={"lat": ("lat", [-22.1, -21.1])})
actual = data_float_coords.sel(lat=slice(-22, -21), method="nearest")
assert_identical(expected, actual)

def test_loc(self) -> None:
data = create_test_data()
expected = data.sel(dim3="a")
Expand Down
Loading