diff --git a/doc/whats-new.rst b/doc/whats-new.rst
index acb81f3692a..f66d538ad0a 100644
--- a/doc/whats-new.rst
+++ b/doc/whats-new.rst
@@ -34,11 +34,15 @@ Bug fixes
 - Fix error when encoding an empty :py:class:`numpy.datetime64` array
   (:issue:`10722`, :pull:`10723`). By `Spencer Clark
   <https://github.com/spencerkclark>`_.
+- Propagate coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
 - Fix error from ``to_netcdf(..., compute=False)`` when using Dask Distributed
   (:issue:`10725`).
   By `Stephan Hoyer <https://github.com/shoyer>`_.
 - Propagation coordinate attrs in :py:meth:`xarray.Dataset.map` (:issue:`9317`, :pull:`10602`).
   By `Justus Magin <https://github.com/keewis>`_.
+- Allow ``combine_attrs="drop_conflicts"`` to handle objects with ``__eq__`` methods that return
+  non-bool values (e.g., numpy arrays) without raising ``ValueError`` (:pull:`10726`).
+  By `Maximilian Roos <https://github.com/max-sixty>`_.
 
 Documentation
 ~~~~~~~~~~~~~
diff --git a/xarray/core/utils.py b/xarray/core/utils.py
index e490fc05c2f..d44aff9ff36 100644
--- a/xarray/core/utils.py
+++ b/xarray/core/utils.py
@@ -239,17 +239,34 @@ def equivalent(first: T, second: T) -> bool:
     """Compare two objects for equivalence (identity or equality), using
     array_equiv if either object is an ndarray. If both objects are lists,
     equivalent is sequentially called on all the elements.
+
+    Returns False for any comparison that doesn't return a boolean,
+    making this function safer to use with objects that have non-standard
+    __eq__ implementations.
     """
     # TODO: refactor to avoid circular import
     from xarray.core import duck_array_ops
 
     if first is second:
         return True
+
     if isinstance(first, np.ndarray) or isinstance(second, np.ndarray):
         return duck_array_ops.array_equiv(first, second)
+
     if isinstance(first, list) or isinstance(second, list):
         return list_equiv(first, second)  # type: ignore[arg-type]
-    return (first == second) or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
+
+    # For non-array/list types, use == but require boolean result
+    result = first == second
+    if not isinstance(result, bool):
+        # Accept numpy bool scalars as well
+        if isinstance(result, np.bool_):
+            return bool(result)
+        # Reject any other non-boolean type (Dataset, Series, custom objects, etc.)
+        return False
+
+    # Check for NaN equivalence
+    return result or (pd.isnull(first) and pd.isnull(second))  # type: ignore[call-overload]
 
 
 def list_equiv(first: Sequence[T], second: Sequence[T]) -> bool:
diff --git a/xarray/structure/merge.py b/xarray/structure/merge.py
index 58168ddb024..5bb53036042 100644
--- a/xarray/structure/merge.py
+++ b/xarray/structure/merge.py
@@ -607,6 +607,25 @@ def merge_coords(
     return variables, out_indexes
 
 
+def equivalent_attrs(a: Any, b: Any) -> bool:
+    """Check if two attribute values are equivalent.
+
+    Returns False if the comparison raises ValueError or TypeError.
+    This handles cases like numpy arrays with ambiguous truth values
+    and xarray Datasets which can't be directly converted to numpy arrays.
+
+    Since equivalent() now handles non-boolean returns by returning False,
+    this wrapper mainly catches exceptions from comparisons that can't be
+    evaluated at all.
+    """
+    try:
+        return equivalent(a, b)
+    except (ValueError, TypeError):
+        # These exceptions indicate the comparison is truly ambiguous
+        # (e.g., nested numpy arrays that would raise "ambiguous truth value")
+        return False
+
+
 def merge_attrs(variable_attrs, combine_attrs, context=None):
     """Combine attributes from different variables according to combine_attrs"""
     if not variable_attrs:
@@ -633,20 +652,18 @@ def merge_attrs(variable_attrs, combine_attrs, context=None):
     elif combine_attrs == "drop_conflicts":
         result = {}
         dropped_keys = set()
+
         for attrs in variable_attrs:
-            result.update(
-                {
-                    key: value
-                    for key, value in attrs.items()
-                    if key not in result and key not in dropped_keys
-                }
-            )
-            result = {
-                key: value
-                for key, value in result.items()
-                if key not in attrs or equivalent(attrs[key], value)
-            }
-            dropped_keys |= {key for key in attrs if key not in result}
+            for key, value in attrs.items():
+                if key in dropped_keys:
+                    continue
+
+                if key not in result:
+                    result[key] = value
+                elif not equivalent_attrs(result[key], value):
+                    del result[key]
+                    dropped_keys.add(key)
+
         return result
     elif combine_attrs == "identical":
         result = dict(variable_attrs[0])
diff --git a/xarray/tests/test_merge.py b/xarray/tests/test_merge.py
index 8ae05fbb261..01bab8ca8b2 100644
--- a/xarray/tests/test_merge.py
+++ b/xarray/tests/test_merge.py
@@ -1,6 +1,9 @@
 from __future__ import annotations
 
+import warnings
+
 import numpy as np
+import pandas as pd
 import pytest
 
 import xarray as xr
@@ -235,6 +238,267 @@ def test_merge_attrs_drop_conflicts(self):
         expected = xr.Dataset(attrs={"a": 0, "d": 0, "e": 0})
         assert_identical(actual, expected)
 
+    def test_merge_attrs_drop_conflicts_numpy_arrays(self):
+        """Test drop_conflicts with numpy arrays."""
+        # Test with numpy arrays (which return arrays from ==)
+        arr1 = np.array([1, 2, 3])
+        arr2 = np.array([1, 2, 3])
+        arr3 = np.array([4, 5, 6])
+
+        ds1 = xr.Dataset(attrs={"arr": arr1, "scalar": 1})
+        ds2 = xr.Dataset(attrs={"arr": arr2, "scalar": 1})  # Same array values
+        ds3 = xr.Dataset(attrs={"arr": arr3, "other": 2})  # Different array values
+
+        # Arrays are considered equivalent if they have the same values
+        actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
+        assert "arr" in actual.attrs  # Should keep the array since they're equivalent
+        assert actual.attrs["scalar"] == 1
+
+        # Different arrays cause the attribute to be dropped
+        actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
+        assert "arr" not in actual.attrs  # Should drop due to conflict
+        assert "other" in actual.attrs
+
+    def test_merge_attrs_drop_conflicts_custom_eq_returns_array(self):
+        """Test drop_conflicts with custom objects that return arrays from __eq__."""
+
+        # Test with custom objects that return non-bool from __eq__
+        class CustomEq:
+            """Object whose __eq__ returns a non-bool value."""
+
+            def __init__(self, value):
+                self.value = value
+
+            def __eq__(self, other):
+                if not isinstance(other, CustomEq):
+                    return False
+                # Return a numpy array (truthy if all elements are non-zero)
+                return np.array([self.value == other.value])
+
+            def __repr__(self):
+                return f"CustomEq({self.value})"
+
+        obj1 = CustomEq(42)
+        obj2 = CustomEq(42)  # Same value
+        obj3 = CustomEq(99)  # Different value
+
+        ds4 = xr.Dataset(attrs={"custom": obj1, "x": 1})
+        ds5 = xr.Dataset(attrs={"custom": obj2, "x": 1})
+        ds6 = xr.Dataset(attrs={"custom": obj3, "y": 2})
+
+        # Suppress DeprecationWarning from numpy < 2.0 about ambiguous truth values
+        # when our custom __eq__ returns arrays that are evaluated in boolean context
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+
+            # Objects returning arrays are dropped (non-boolean return)
+            actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
+            assert "custom" not in actual.attrs  # Dropped - returns array, not bool
+            assert actual.attrs["x"] == 1
+
+            # Different values also dropped (returns array, not bool)
+            actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
+            assert "custom" not in actual.attrs  # Dropped - returns non-boolean
+            assert actual.attrs["x"] == 1
+            assert actual.attrs["y"] == 2
+
+    def test_merge_attrs_drop_conflicts_ambiguous_array_returns(self):
+        """Test drop_conflicts with objects returning ambiguous arrays from __eq__."""
+
+        # Test edge case: object whose __eq__ returns empty array (ambiguous truth value)
+        class EmptyArrayEq:
+            def __eq__(self, other):
+                if not isinstance(other, EmptyArrayEq):
+                    return False
+                return np.array([])  # Empty array has ambiguous truth value
+
+            def __repr__(self):
+                return "EmptyArrayEq()"
+
+        empty_obj1 = EmptyArrayEq()
+        empty_obj2 = EmptyArrayEq()
+
+        ds7 = xr.Dataset(attrs={"empty": empty_obj1})
+        ds8 = xr.Dataset(attrs={"empty": empty_obj2})
+
+        # With new behavior: ambiguous truth values are treated as non-equivalent
+        # So the attribute is dropped instead of raising an error
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
+            assert "empty" not in actual.attrs  # Dropped due to ambiguous comparison
+
+        # Test with object that returns multi-element array (also ambiguous)
+        class MultiArrayEq:
+            def __eq__(self, other):
+                if not isinstance(other, MultiArrayEq):
+                    return False
+                return np.array([True, False])  # Multi-element array is ambiguous
+
+            def __repr__(self):
+                return "MultiArrayEq()"
+
+        multi_obj1 = MultiArrayEq()
+        multi_obj2 = MultiArrayEq()
+
+        ds9 = xr.Dataset(attrs={"multi": multi_obj1})
+        ds10 = xr.Dataset(attrs={"multi": multi_obj2})
+
+        # With new behavior: ambiguous arrays are treated as non-equivalent
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            actual = xr.merge([ds9, ds10], combine_attrs="drop_conflicts")
+            assert "multi" not in actual.attrs  # Dropped due to ambiguous comparison
+
+    def test_merge_attrs_drop_conflicts_all_true_array(self):
+        """Test drop_conflicts with all-True multi-element array from __eq__."""
+
+        # Test with all-True multi-element array (unambiguous truthy)
+        class AllTrueArrayEq:
+            def __eq__(self, other):
+                if not isinstance(other, AllTrueArrayEq):
+                    return False
+                return np.array([True, True, True])  # All True, but still multi-element
+
+            def __repr__(self):
+                return "AllTrueArrayEq()"
+
+        alltrue1 = AllTrueArrayEq()
+        alltrue2 = AllTrueArrayEq()
+
+        ds11 = xr.Dataset(attrs={"alltrue": alltrue1})
+        ds12 = xr.Dataset(attrs={"alltrue": alltrue2})
+
+        # Multi-element arrays are ambiguous even if all True
+        actual = xr.merge([ds11, ds12], combine_attrs="drop_conflicts")
+        assert "alltrue" not in actual.attrs  # Dropped due to ambiguous comparison
+
+    def test_merge_attrs_drop_conflicts_nested_arrays(self):
+        """Test drop_conflicts with NumPy object arrays containing nested arrays."""
+        # Test 1: NumPy object arrays with nested arrays
+        # These can have complex comparison behavior
+        x = np.array([None], dtype=object)
+        x[0] = np.arange(3)
+        y = np.array([None], dtype=object)
+        y[0] = np.arange(10, 13)
+
+        ds1 = xr.Dataset(attrs={"nested_array": x, "common": 1})
+        ds2 = xr.Dataset(attrs={"nested_array": y, "common": 1})
+
+        # Different nested arrays should cause attribute to be dropped
+        actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
+        assert (
+            "nested_array" not in actual.attrs
+        )  # Dropped due to different nested arrays
+        assert actual.attrs["common"] == 1
+
+        # Test with identical nested arrays
+        # Note: Even identical nested arrays will be dropped because comparison
+        # raises ValueError due to ambiguous truth value
+        z = np.array([None], dtype=object)
+        z[0] = np.arange(3)  # Same as x
+        ds3 = xr.Dataset(attrs={"nested_array": z, "other": 2})
+
+        actual = xr.merge([ds1, ds3], combine_attrs="drop_conflicts")
+        assert (
+            "nested_array" not in actual.attrs
+        )  # Dropped due to ValueError in comparison
+        assert actual.attrs["other"] == 2
+
+    def test_merge_attrs_drop_conflicts_dataset_attrs(self):
+        """Test drop_conflicts with xarray.Dataset objects as attributes."""
+        # xarray.Dataset objects as attributes (raises TypeError in equivalent)
+        attr_ds1 = xr.Dataset({"foo": 1})
+        attr_ds2 = xr.Dataset({"bar": 1})  # Different dataset
+        attr_ds3 = xr.Dataset({"foo": 1})  # Same as attr_ds1
+
+        ds4 = xr.Dataset(attrs={"dataset_attr": attr_ds1, "scalar": 42})
+        ds5 = xr.Dataset(attrs={"dataset_attr": attr_ds2, "scalar": 42})
+        ds6 = xr.Dataset(attrs={"dataset_attr": attr_ds3, "other": 99})
+
+        # Different datasets raise TypeError and should be dropped
+        actual = xr.merge([ds4, ds5], combine_attrs="drop_conflicts")
+        assert "dataset_attr" not in actual.attrs  # Dropped due to TypeError
+        assert actual.attrs["scalar"] == 42
+
+        # Identical datasets are also dropped (comparison returns Dataset, not bool)
+        actual = xr.merge([ds4, ds6], combine_attrs="drop_conflicts")
+        assert "dataset_attr" not in actual.attrs  # Dropped - returns Dataset, not bool
+        assert actual.attrs["other"] == 99
+
+    def test_merge_attrs_drop_conflicts_pandas_series(self):
+        """Test drop_conflicts with Pandas Series as attributes."""
+        # Pandas Series (raises ValueError due to ambiguous truth value)
+        series1 = pd.Series([1, 2])
+        series2 = pd.Series([3, 4])  # Different values
+        series3 = pd.Series([1, 2])  # Same as series1
+
+        ds7 = xr.Dataset(attrs={"series": series1, "value": "a"})
+        ds8 = xr.Dataset(attrs={"series": series2, "value": "a"})
+        ds9 = xr.Dataset(attrs={"series": series3, "value": "a"})
+
+        # Suppress potential warnings from pandas comparisons
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore", category=DeprecationWarning)
+            warnings.filterwarnings("ignore", category=FutureWarning)
+
+            # Different series raise ValueError and get dropped
+            actual = xr.merge([ds7, ds8], combine_attrs="drop_conflicts")
+            assert "series" not in actual.attrs  # Dropped due to ValueError
+            assert actual.attrs["value"] == "a"
+
+            # Even identical series raise ValueError in equivalent() and get dropped
+            # because Series comparison returns another Series with ambiguous truth value
+            actual = xr.merge([ds7, ds9], combine_attrs="drop_conflicts")
+            assert "series" not in actual.attrs  # Dropped due to ValueError
+            assert actual.attrs["value"] == "a"
+
+    def test_merge_attrs_drop_conflicts_eq_returns_string(self):
+        """Test objects whose __eq__ returns strings are dropped."""
+
+        # Case 1: Objects whose __eq__ returns non-boolean strings
+        class ReturnsString:
+            def __init__(self, value):
+                self.value = value
+
+            def __eq__(self, other):
+                # Always returns a string (non-boolean)
+                return "comparison result"
+
+        obj1 = ReturnsString("A")
+        obj2 = ReturnsString("B")  # Different object
+
+        ds1 = xr.Dataset(attrs={"obj": obj1})
+        ds2 = xr.Dataset(attrs={"obj": obj2})
+
+        actual = xr.merge([ds1, ds2], combine_attrs="drop_conflicts")
+
+        # Strict behavior: drops attribute because __eq__ returns non-boolean
+        assert "obj" not in actual.attrs
+
+    def test_merge_attrs_drop_conflicts_eq_returns_number(self):
+        """Test objects whose __eq__ returns numbers are dropped."""
+
+        # Case 2: Objects whose __eq__ returns numbers
+        class ReturnsZero:
+            def __init__(self, value):
+                self.value = value
+
+            def __eq__(self, other):
+                # Always returns 0 (non-boolean)
+                return 0
+
+        obj3 = ReturnsZero("same")
+        obj4 = ReturnsZero("same")  # Different object, same value
+
+        ds3 = xr.Dataset(attrs={"zero": obj3})
+        ds4 = xr.Dataset(attrs={"zero": obj4})
+
+        actual = xr.merge([ds3, ds4], combine_attrs="drop_conflicts")
+
+        # Strict behavior: drops attribute because __eq__ returns non-boolean
+        assert "zero" not in actual.attrs
+
     def test_merge_attrs_no_conflicts_compat_minimal(self):
         """make sure compat="minimal" does not silence errors"""
         ds1 = xr.Dataset({"a": ("x", [], {"a": 0})})