Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@ Deprecations

Bug Fixes
~~~~~~~~~
- Fix h5netcdf backend for format=None, use same rule as netcdf4 backend (:pull:`10859`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_
- ``netcdf4`` and ``pydap`` backends now use stricter URL detection to avoid incorrectly claiming
remote URLs. The ``pydap`` backend now only claims URLs with explicit DAP protocol indicators
(``dap2://`` or ``dap4://`` schemes, or ``/dap2/`` or ``/dap4/`` in the URL path). This prevents
both backends from claiming remote Zarr stores and other non-DAP URLs without an explicit
``engine=`` argument. (:pull:`10804`). By `Ian Hunt-Isaak <https://github.com/ianhi>`_.
- Fix indexing with empty arrays for scipy & h5netcdf backends which now resolves to empty slices (:issue:`10867`, :pull:`10870`).
By `Kai Mühlbauer <https://github.com/kmuehlbauer>`_
- Fix error handling issue in ``decode_cf_variables`` when decoding fails - the exception is now re-raised
correctly, with a note added about the variable name that caused the error (:issue:`10873`, :pull:`10886`).
By `Jonas L. Bertelsen <https://github.com/jonaslb>`_
- When assigning an indexed coordinate to a data variable or coordinate, coerce it from
``IndexVariable`` to ``Variable`` (:issue:`9859`, :issue:`10829`, :pull:`10909`)
By `Julia Signell <https://github.com/jsignell>`_

- The NetCDF4 backend will now claim to be able to read any URL except for one that contains
the substring zarr. This restores backward compatibility after
Expand Down
2 changes: 1 addition & 1 deletion xarray/core/coordinates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1265,7 +1265,7 @@ def create_coords_with_default_indexes(
variables.update(idx_vars)
all_variables.update(idx_vars)
else:
variables[name] = variable
variables[name] = variable.to_base_variable()

new_coords = Coordinates._construct_direct(coords=variables, indexes=indexes)

Expand Down
13 changes: 11 additions & 2 deletions xarray/structure/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
emit_user_level_warning,
equivalent,
)
from xarray.core.variable import Variable, as_variable, calculate_dimensions
from xarray.core.variable import (
IndexVariable,
Variable,
as_variable,
calculate_dimensions,
)
from xarray.structure.alignment import deep_align
from xarray.util.deprecation_helpers import (
_COMPAT_DEFAULT,
Expand Down Expand Up @@ -1206,7 +1211,11 @@ def dataset_update_method(dataset: Dataset, other: CoercibleMapping) -> _MergeRe
if c not in value.dims and c in dataset.coords
]
if coord_names:
other[key] = value.drop_vars(coord_names)
value = value.drop_vars(coord_names)
if isinstance(value.variable, IndexVariable):
variable = value.variable.to_base_variable()
value = value._replace(variable=variable)
other[key] = value

return merge_core(
[dataset, other],
Expand Down
52 changes: 28 additions & 24 deletions xarray/testing/assertions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import functools
import warnings
from collections.abc import Hashable
from typing import Any

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -362,6 +363,17 @@ def _assert_indexes_invariants_checks(
if isinstance(v, IndexVariable)
}
assert indexes.keys() <= index_vars, (set(indexes), index_vars)
assert all(
k in index_vars
for k, v in possible_coord_variables.items()
if v.dims == (k,)
), {k: type(v) for k, v in possible_coord_variables.items()}

assert not any(
isinstance(v, IndexVariable)
for k, v in possible_coord_variables.items()
if v.dims == (k,) and k not in indexes.keys()
), {k: type(v) for k, v in possible_coord_variables.items()}

# check pandas index wrappers vs. coordinate data adapters
for k, index in indexes.items():
Expand Down Expand Up @@ -401,11 +413,17 @@ def _assert_indexes_invariants_checks(
)


def _assert_variable_invariants(var: Variable, name: Hashable = None):
def _assert_variable_invariants(
var: Variable | Any,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The check now happens within this function.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with https://github.com/pydata/xarray/pull/10909/files#r2528970158, it may be better to keep the IndexVariable vs Variable check at the level of Dataset and/or DataArray invariants.

Actually, the right place should probably be _assert_indexes_invariants_checks, where we could add the check that non-index variables are not instances of IndexVariable.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I moved the IndexVariable check, but I still think it is nicer to keep the Variable check within this function. Let me know if you'd still rather I move it out.

name: Hashable = None,
) -> None:
if name is None:
name_or_empty: tuple = ()
else:
name_or_empty = (name,)

assert isinstance(var, Variable), {name: type(var)}

assert isinstance(var._dims, tuple), name_or_empty + (var._dims,)
assert len(var._dims) == len(var._data.shape), name_or_empty + (
var._dims,
Expand All @@ -418,35 +436,28 @@ def _assert_variable_invariants(var: Variable, name: Hashable = None):


def _assert_dataarray_invariants(da: DataArray, check_default_indexes: bool):
assert isinstance(da._variable, Variable), da._variable
_assert_variable_invariants(da._variable)

assert isinstance(da._coords, dict), da._coords
assert all(isinstance(v, Variable) for v in da._coords.values()), da._coords

if check_default_indexes:
assert all(set(v.dims) <= set(da.dims) for v in da._coords.values()), (
da.dims,
{k: v.dims for k, v in da._coords.items()},
)
assert all(
isinstance(v, IndexVariable)
for (k, v) in da._coords.items()
if v.dims == (k,)
), {k: type(v) for k, v in da._coords.items()}

for k, v in da._coords.items():
_assert_variable_invariants(v, k)

if da._indexes is not None:
_assert_indexes_invariants_checks(
da._indexes, da._coords, da.dims, check_default=check_default_indexes
)
assert da._indexes is not None
_assert_indexes_invariants_checks(
da._indexes, da._coords, da.dims, check_default=check_default_indexes
)


def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool):
assert isinstance(ds._variables, dict), type(ds._variables)
assert all(isinstance(v, Variable) for v in ds._variables.values()), ds._variables

for k, v in ds._variables.items():
_assert_variable_invariants(v, k)

Expand All @@ -466,17 +477,10 @@ def _assert_dataset_invariants(ds: Dataset, check_default_indexes: bool):
ds._dims[k] == v.sizes[k] for v in ds._variables.values() for k in v.sizes
), (ds._dims, {k: v.sizes for k, v in ds._variables.items()})

if check_default_indexes:
assert all(
isinstance(v, IndexVariable)
for (k, v) in ds._variables.items()
if v.dims == (k,)
), {k: type(v) for k, v in ds._variables.items() if v.dims == (k,)}

if ds._indexes is not None:
_assert_indexes_invariants_checks(
ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes
)
assert ds._indexes is not None
_assert_indexes_invariants_checks(
ds._indexes, ds._variables, ds._dims, check_default=check_default_indexes
)

assert isinstance(ds._encoding, type(None) | dict)
assert isinstance(ds._attrs, type(None) | dict)
Expand Down
14 changes: 14 additions & 0 deletions xarray/tests/test_dataarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -1702,6 +1702,20 @@ def should_add_coord_to_array(self, name, var, dims):
assert_identical(actual, expected, check_default_indexes=False)
assert "x_bnds" not in actual.dims

def test_assign_coords_uses_base_variable_class(self) -> None:
a = DataArray([0, 1, 3], dims=["x"], coords={"x": [0, 1, 2]})
a = a.assign_coords(foo=a.x)

# explicit check
assert isinstance(a["x"].variable, IndexVariable)
assert not isinstance(a["foo"].variable, IndexVariable)

# test internal invariant checks when comparing the datasets
expected = DataArray(
[0, 1, 3], dims=["x"], coords={"x": [0, 1, 2], "foo": ("x", [0, 1, 2])}
)
assert_identical(a, expected)

def test_coords_alignment(self) -> None:
lhs = DataArray([1, 2, 3], [("x", [0, 1, 2])])
rhs = DataArray([2, 3, 4], [("x", [1, 2, 3])])
Expand Down
18 changes: 16 additions & 2 deletions xarray/tests/test_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -4311,9 +4311,11 @@ def test_to_stacked_array_preserves_dtype(self) -> None:
# coordinate created from variables names should be of string dtype
data = np.array(["a", "a", "a", "b"], dtype="<U1")
expected_stacked_variable = DataArray(name="variable", data=data, dims="z")

# coerce from `IndexVariable` to `Variable` before comparing
assert_identical(
stacked.coords["variable"].drop_vars(["z", "variable", "y"]),
expected_stacked_variable,
stacked["variable"].variable.to_base_variable(),
expected_stacked_variable.variable,
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was the only test change that was needed. Basically stacked["variable"].variable is an IndexVariable because it is represents a coord that is part of a MultiIndex. But you can't know that if you only have access to the DataArray with no coords. So I just switched it to be comparing variable and to coerce to the Variable.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah multi-indexes are where IndexVariable really bites us

e.g. #8887

)

def test_to_stacked_array_transposed(self) -> None:
Expand Down Expand Up @@ -4779,6 +4781,18 @@ def test_setitem_using_list_errors(self, var_list, data, error_regex) -> None:
with pytest.raises(ValueError, match=error_regex):
actual[var_list] = data

def test_setitem_uses_base_variable_class_even_for_index_variables(self) -> None:
ds = Dataset(coords={"x": [1, 2, 3]})
ds["y"] = ds["x"]

# explicit check
assert isinstance(ds["x"].variable, IndexVariable)
assert not isinstance(ds["y"].variable, IndexVariable)

# test internal invariant checks when comparing the datasets
expected = Dataset(data_vars={"y": ("x", [1, 2, 3])}, coords={"x": [1, 2, 3]})
assert_identical(ds, expected)

def test_assign(self) -> None:
ds = Dataset()
actual = ds.assign(x=[0, 1, 2], y=2)
Expand Down
Loading