Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,15 @@ Bug fixes
- Fix error when encoding an empty :py:class:`numpy.datetime64` array
(:issue:`10722`, :pull:`10723`). By `Spencer Clark
<https://github.com/spencerkclark>`_.
- Propagate coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
- Fix error from ``to_netcdf(..., compute=False)`` when using Dask Distributed
(:issue:`10725`).
By `Stephan Hoyer <https://github.com/shoyer>`_.
- Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
By `Justus Magin <https://github.com/keewis>`_.
- Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return
non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`).
By `Maximilian Roos <https://github.com/max-sixty>`_.

Documentation
~~~~~~~~~~~~~
Expand Down
19 changes: 18 additions & 1 deletion xarray/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,17 +239,34 @@ def equivalent(first: T, second: T) -> bool:
"""Compare two objects for equivalence (identity or equality), using
array_equiv if either object is an ndarray. If both objects are lists,
equivalent is sequentially called on all the elements.

Returns False for any comparison that doesn't return a boolean,
making this function safer to use with objects that have non-standard
__eq__ implementations.
"""
# TODO: refactor to avoid circular import
from xarray.core import duck_array_ops

if first is second:
return True

if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
return duck_array_ops.array_equiv(first, second)

if isinstance(first, list) or isinstance(second, list):
return list_equiv(first, second) # type: ignore[arg-type]
return (first == second) or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload]

# For non-array/list types, use == but require boolean result
result = first == second
if not isinstance(result, bool):
# Accept numpy bool scalars as well
if isinstance(result, np.bool_):
return bool(result)
# Reject any other non-boolean type (Dataset, Series, custom objects, etc.)
return False

# Check for NaN equivalence
return result or (pd.isnull(first) and pd.isnull(second)) # type: ignore[call-overload]


def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
Expand Down
43 changes: 30 additions & 13 deletions xarray/structure/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,6 +607,25 @@ def merge_coords(
return variables, out_indexes


def equivalent_attrs(a: Any, b: Any) -> bool:
"""Check if two attribute values are equivalent.

Returns False if the comparison raises ValueError or TypeError.
This handles cases like numpy arrays with ambiguous truth values
and xarray Datasets which can't be directly converted to numpy arrays.

Since equivalent() now handles non-boolean returns by returning False,
this wrapper mainly catches exceptions from comparisons that can't be
evaluated at all.
"""
try:
return equivalent(a, b)
except (ValueError, TypeError):
# These exceptions indicate the comparison is truly ambiguous
# (e.g., nested numpy arrays that would raise "ambiguous truth value")
return False


def merge_attrs(variable_attrs, combine_attrs, context=None):
"""Combine attributes from different variables according to combine_attrs"""
if not variable_attrs:
Expand All @@ -633,20 +652,18 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
elif combine_attrs == "drop_conflicts":
result = {}
dropped_keys = set()

for attrs in variable_attrs:
result.update(
{
key: value
for key, value in attrs.items()
if key not in result and key not in dropped_keys
}
)
result = {
key: value
for key, value in result.items()
if key not in attrs or equivalent(attrs[key], value)
}
dropped_keys |= {key for key in attrs if key not in result}
for key, value in attrs.items():
if key in dropped_keys:
continue

if key not in result:
result[key] = value
elif not equivalent_attrs(result[key], value):
del result[key]
dropped_keys.add(key)

return result
elif combine_attrs == "identical":
result = dict(variable_attrs[0])
Expand Down
264 changes: 264 additions & 0 deletions xarray/tests/test_merge.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
from __future__ import annotations

import warnings

import numpy as np
import pandas as pd
import pytest

import xarray as xr
Expand Down Expand Up @@ -235,6 +238,267 @@ def test_merge_attrs_drop_conflicts(self):
expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0})
assert_identical(actual, expected)

def test_merge_attrs_drop_conflicts_numpy_arrays(self):
"""Test drop_conflicts with numpy arrays."""
# Test with numpy arrays (which return arrays from ==)
arr1 = np.array([1, 2, 3])
arr2 = np.array([1, 2, 3])
arr3 = np.array([4, 5, 6])

ds1 = xr.Dataset(attrs={"arr": arr1, "scalar": 1})
ds2 = xr.Dataset(attrs={"arr": arr2, "scalar": 1}) # Same array values
ds3 = xr.Dataset(attrs={"arr": arr3, "other": 2}) # Different array values

# Arrays are considered equivalent if they have the same values
actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
assert "arr" in actual.attrs # Should keep the array since they're equivalent
assert actual.attrs["scalar"] == 1

# Different arrays cause the attribute to be dropped
actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
assert "arr" not in actual.attrs # Should drop due to conflict
assert "other" in actual.attrs

def test_merge_attrs_drop_conflicts_custom_eq_returns_array(self):
"""Test drop_conflicts with custom objects that return arrays from __eq__."""

# Test with custom objects that return non-bool from __eq__
class CustomEq:
"""Object whose __eq__ returns a non-bool value."""

def __init__(self, value):
self.value = value

def __eq__(self, other):
if not isinstance(other, CustomEq):
return False
# Return a numpy array (truthy if all elements are non-zero)
return np.array([self.value == other.value])

def __repr__(self):
return f"CustomEq({self.value})"

obj1 = CustomEq(42)
obj2 = CustomEq(42) # Same value
obj3 = CustomEq(99) # Different value

ds4 = xr.Dataset(attrs={"custom": obj1, "x": 1})
ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1})
ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2})

# Suppress DeprecationWarning from numpy < 2.0 about ambiguous truth values
# when our custom __eq__ returns arrays that are evaluated in boolean context
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Objects returning arrays are dropped (non-boolean return)
actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
assert "custom" not in actual.attrs # Dropped - returns array, not bool
assert actual.attrs["x"] == 1

# Different values also dropped (returns array, not bool)
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
assert "custom" not in actual.attrs # Dropped - returns non-boolean
assert actual.attrs["x"] == 1
assert actual.attrs["y"] == 2

def test_merge_attrs_drop_conflicts_ambiguous_array_returns(self):
"""Test drop_conflicts with objects returning ambiguous arrays from __eq__."""

# Test edge case: object whose __eq__ returns empty array (ambiguous truth value)
class EmptyArrayEq:
def __eq__(self, other):
if not isinstance(other, EmptyArrayEq):
return False
return np.array([]) # Empty array has ambiguous truth value

def __repr__(self):
return "EmptyArrayEq()"

empty_obj1 = EmptyArrayEq()
empty_obj2 = EmptyArrayEq()

ds7 = xr.Dataset(attrs={"empty": empty_obj1})
ds8 = xr.Dataset(attrs={"empty": empty_obj2})

# With new behavior: ambiguous truth values are treated as non-equivalent
# So the attribute is dropped instead of raising an error
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
assert "empty" not in actual.attrs # Dropped due to ambiguous comparison

# Test with object that returns multi-element array (also ambiguous)
class MultiArrayEq:
def __eq__(self, other):
if not isinstance(other, MultiArrayEq):
return False
return np.array([True, False]) # Multi-element array is ambiguous

def __repr__(self):
return "MultiArrayEq()"

multi_obj1 = MultiArrayEq()
multi_obj2 = MultiArrayEq()

ds9 = xr.Dataset(attrs={"multi": multi_obj1})
ds10 = xr.Dataset(attrs={"multi": multi_obj2})

# With new behavior: ambiguous arrays are treated as non-equivalent
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
assert "multi" not in actual.attrs # Dropped due to ambiguous comparison

def test_merge_attrs_drop_conflicts_all_true_array(self):
"""Test drop_conflicts with all-True multi-element array from __eq__."""

# Test with all-True multi-element array (unambiguous truthy)
class AllTrueArrayEq:
def __eq__(self, other):
if not isinstance(other, AllTrueArrayEq):
return False
return np.array([True, True, True]) # All True, but still multi-element

def __repr__(self):
return "AllTrueArrayEq()"

alltrue1 = AllTrueArrayEq()
alltrue2 = AllTrueArrayEq()

ds11 = xr.Dataset(attrs={"alltrue": alltrue1})
ds12 = xr.Dataset(attrs={"alltrue": alltrue2})

# Multi-element arrays are ambiguous even if all True
actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
assert "alltrue" not in actual.attrs # Dropped due to ambiguous comparison

def test_merge_attrs_drop_conflicts_nested_arrays(self):
"""Test drop_conflicts with NumPy object arrays containing nested arrays."""
# Test 1: NumPy object arrays with nested arrays
# These can have complex comparison behavior
x = np.array([None], dtype=object)
x[0] = np.arange(3)
y = np.array([None], dtype=object)
y[0] = np.arange(10, 13)

ds1 = xr.Dataset(attrs={"nested_array": x, "common": 1})
ds2 = xr.Dataset(attrs={"nested_array": y, "common": 1})

# Different nested arrays should cause attribute to be dropped
actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
assert (
"nested_array" not in actual.attrs
) # Dropped due to different nested arrays
assert actual.attrs["common"] == 1

# Test with identical nested arrays
# Note: Even identical nested arrays will be dropped because comparison
# raises ValueError due to ambiguous truth value
z = np.array([None], dtype=object)
z[0] = np.arange(3) # Same as x
ds3 = xr.Dataset(attrs={"nested_array": z, "other": 2})

actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
assert (
"nested_array" not in actual.attrs
) # Dropped due to ValueError in comparison
assert actual.attrs["other"] == 2

def test_merge_attrs_drop_conflicts_dataset_attrs(self):
"""Test drop_conflicts with xarray.Dataset objects as attributes."""
# xarray.Dataset objects as attributes (raises TypeError in equivalent)
attr_ds1 = xr.Dataset({"foo": 1})
attr_ds2 = xr.Dataset({"bar": 1}) # Different dataset
attr_ds3 = xr.Dataset({"foo": 1}) # Same as attr_ds1

ds4 = xr.Dataset(attrs={"dataset_attr": attr_ds1, "scalar": 42})
ds5 = xr.Dataset(attrs={"dataset_attr": attr_ds2, "scalar": 42})
ds6 = xr.Dataset(attrs={"dataset_attr": attr_ds3, "other": 99})

# Different datasets raise TypeError and should be dropped
actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
assert "dataset_attr" not in actual.attrs # Dropped due to TypeError
assert actual.attrs["scalar"] == 42

# Identical datasets are also dropped (comparison returns Dataset, not bool)
actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
assert "dataset_attr" not in actual.attrs # Dropped - returns Dataset, not bool
assert actual.attrs["other"] == 99

def test_merge_attrs_drop_conflicts_pandas_series(self):
"""Test drop_conflicts with Pandas Series as attributes."""
# Pandas Series (raises ValueError due to ambiguous truth value)
series1 = pd.Series([1, 2])
series2 = pd.Series([3, 4]) # Different values
series3 = pd.Series([1, 2]) # Same as series1

ds7 = xr.Dataset(attrs={"series": series1, "value": "a"})
ds8 = xr.Dataset(attrs={"series": series2, "value": "a"})
ds9 = xr.Dataset(attrs={"series": series3, "value": "a"})

# Suppress potential warnings from pandas comparisons
with warnings.catch_warnings():
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

# Different series raise ValueError and get dropped
actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
assert "series" not in actual.attrs # Dropped due to ValueError
assert actual.attrs["value"] == "a"

# Even identical series raise ValueError in equivalent() and get dropped
# because Series comparison returns another Series with ambiguous truth value
actual = xr.merge([ds7, ds9], combine_attrs="drop_conflicts")
assert "series" not in actual.attrs # Dropped due to ValueError
assert actual.attrs["value"] == "a"

def test_merge_attrs_drop_conflicts_eq_returns_string(self):
"""Test objects whose __eq__ returns strings are dropped."""

# Case 1: Objects whose __eq__ returns non-boolean strings
class ReturnsString:
def __init__(self, value):
self.value = value

def __eq__(self, other):
# Always returns a string (non-boolean)
return "comparison result"

obj1 = ReturnsString("A")
obj2 = ReturnsString("B") # Different object

ds1 = xr.Dataset(attrs={"obj": obj1})
ds2 = xr.Dataset(attrs={"obj": obj2})

actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")

# Strict behavior: drops attribute because __eq__ returns non-boolean
assert "obj" not in actual.attrs

def test_merge_attrs_drop_conflicts_eq_returns_number(self):
"""Test objects whose __eq__ returns numbers are dropped."""

# Case 2: Objects whose __eq__ returns numbers
class ReturnsZero:
def __init__(self, value):
self.value = value

def __eq__(self, other):
# Always returns 0 (non-boolean)
return 0

obj3 = ReturnsZero("same")
obj4 = ReturnsZero("same") # Different object, same value

ds3 = xr.Dataset(attrs={"zero": obj3})
ds4 = xr.Dataset(attrs={"zero": obj4})

actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts")

# Strict behavior: drops attribute because __eq__ returns non-boolean
assert "zero" not in actual.attrs

def test_merge_attrs_no_conflicts_compat_minimal(self):
"""make sure compat="minimal" does not silence errors"""
ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})
Expand Down
Loading