diff --git a/docs/src/whatsnew/latest.rst b/docs/src/whatsnew/latest.rst index a69de60f95..7e5c50a60e 100644 --- a/docs/src/whatsnew/latest.rst +++ b/docs/src/whatsnew/latest.rst @@ -37,6 +37,10 @@ This document explains the changes made to Iris for this release (:issue:`6248`, :pull:`6257`) +#. `@fnattino`_ added the lazy median aggregator :class:`iris.analysis.MEDIAN` + based on the implementation discussed by `@rcomer`_ and `@stefsmeets`_ in + :issue:`4039` (:pull:`6167`). + 🐛 Bugs Fixed ============= @@ -98,8 +102,9 @@ This document explains the changes made to Iris for this release Whatsnew author names (@github name) in alphabetical order. Note that, core dev names are automatically included by the common_links.inc: - - +.. _@fnattino: https://github.com/fnattino +.. _@jrackham-mo: https://github.com/jrackham-mo +.. _@stefsmeets: https://github.com/stefsmeets .. comment Whatsnew resources in alphabetical order: \ No newline at end of file diff --git a/lib/iris/analysis/__init__.py b/lib/iris/analysis/__init__.py index 2c890ef8cc..708f141de3 100644 --- a/lib/iris/analysis/__init__.py +++ b/lib/iris/analysis/__init__.py @@ -1612,6 +1612,19 @@ def _lazy_max_run(array, axis=-1, **kwargs): return result +def _lazy_median(data, axis=None, **kwargs): + """Calculate the lazy median, with support for masked arrays.""" + # Dask median requires the axes to be explicitly listed. + axis = range(data.ndim) if axis is None else axis + + if np.issubdtype(data, np.integer): + data = data.astype(float) + filled = da.ma.filled(data, np.nan) + result = da.nanmedian(filled, axis=axis, **kwargs) + result_masked = da.ma.fix_invalid(result) + return result_masked + + def _rms(array, axis, **kwargs): rval = np.sqrt(ma.average(array**2, axis=axis, **kwargs)) @@ -1940,7 +1953,9 @@ def interp_order(length): """ -MEDIAN = Aggregator("median", ma.median) +MEDIAN = Aggregator( + "median", ma.median, lazy_func=_build_dask_mdtol_function(_lazy_median) +) """ An :class:`~iris.analysis.Aggregator` instance that calculates the median over a :class:`~iris.cube.Cube`, as computed by @@ -1953,8 +1968,7 @@ def interp_order(length): result = cube.collapsed('longitude', iris.analysis.MEDIAN) -This aggregator handles masked data, but NOT lazy data. For lazy aggregation, -please try :obj:`~.PERCENTILE`. +This aggregator handles masked data and lazy data. """ diff --git a/lib/iris/tests/unit/analysis/test_MEDIAN.py b/lib/iris/tests/unit/analysis/test_MEDIAN.py new file mode 100644 index 0000000000..20b781b48b --- /dev/null +++ b/lib/iris/tests/unit/analysis/test_MEDIAN.py @@ -0,0 +1,90 @@ +# Copyright Iris contributors +# +# This file is part of Iris and is released under the BSD license. +# See LICENSE in the root of the repository for full licensing details. +"""Unit tests for the :data:`iris.analysis.MEDIAN` aggregator.""" + +import numpy as np +import numpy.ma as ma + +from iris._lazy_data import ( + as_concrete_data, + as_lazy_data, + is_lazy_data, + is_lazy_masked_data, +) +from iris.analysis import MEDIAN +from iris.tests._shared_utils import assert_array_almost_equal, assert_array_equal + + +def _get_data(lazy=False, masked=False): + data = np.arange(16).reshape((4, 4)) + if masked: + mask = np.eye(4) + data = ma.masked_array(data=data, mask=mask) + if lazy: + data = as_lazy_data(data) + return data + + +class Test_basics: + def setup_method(self): + self.data = _get_data() + + def test_name(self): + assert MEDIAN.name() == "median" + + def test_collapse(self): + data = MEDIAN.aggregate(self.data, axis=(0, 1)) + assert_array_equal(data, [7.5]) + + +class Test_masked: + def setup_method(self): + self.data = _get_data(masked=True) + + def test_output_is_masked(self): + result = MEDIAN.aggregate(self.data, axis=1) + assert ma.isMaskedArray(result) + + def test_median_is_mask_aware(self): + # the median computed along one axis differs if the array is masked + axis = 1 + result = MEDIAN.aggregate(self.data, axis=axis) + data_no_mask = _get_data() + result_no_mask = MEDIAN.aggregate(data_no_mask, axis=axis) + assert not np.allclose(result, result_no_mask) + + +class Test_lazy: + def setup_method(self): + self.data = _get_data(lazy=True) + + def test_output_is_lazy(self): + result = MEDIAN.lazy_aggregate(self.data, axis=(0, 1)) + assert is_lazy_data(result) + + def test_shape(self): + result = MEDIAN.lazy_aggregate(self.data, axis=1) + assert result.shape == (4,) + + def test_result_values(self): + axis = 1 + result = MEDIAN.lazy_aggregate(self.data, axis=axis) + expected = np.median(as_concrete_data(self.data), axis=axis) + assert_array_almost_equal(result, expected) + + +class Test_lazy_masked: + def setup_method(self): + self.data = _get_data(lazy=True, masked=True) + + def test_output_is_lazy_and_masked(self): + result = MEDIAN.lazy_aggregate(self.data, axis=1) + assert is_lazy_masked_data(result) + + def test_result_values(self): + axis = 1 + result = MEDIAN.lazy_aggregate(self.data, axis=axis) + expected = ma.median(as_concrete_data(self.data), axis=axis) + assert_array_almost_equal(result, expected)