Skip to content

Lazy median aggregator #6167

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Jan 17, 2025
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ This document explains the changes made to Iris for this release
(:issue:`6248`, :pull:`6257`)


#. `@fnattino`_ added the lazy median aggregator :class:`iris.analysis.MEDIAN`
based on the implementation discussed by `@rcomer`_ and `@stefsmeets`_ in
:issue:`4039` (:pull:`6167`).


🐛 Bugs Fixed
=============
Expand Down Expand Up @@ -98,8 +102,9 @@ This document explains the changes made to Iris for this release
Whatsnew author names (@github name) in alphabetical order. Note that,
core dev names are automatically included by the common_links.inc:



.. _@fnattino: https://github.com/fnattino
.. _@jrackham-mo: https://github.com/jrackham-mo
.. _@stefsmeets: https://github.com/stefsmeets

.. comment
Whatsnew resources in alphabetical order:
20 changes: 17 additions & 3 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,6 +1612,19 @@ def _lazy_max_run(array, axis=-1, **kwargs):
return result


def _lazy_median(data, axis=None, **kwargs):
"""Calculate the lazy median, with support for masked arrays."""
# Dask median requires the axes to be explicitly listed.
axis = range(data.ndim) if axis is None else axis

if np.issubdtype(data, np.integer):
data = data.astype(float)
filled = da.ma.filled(data, np.nan)
result = da.nanmedian(filled, axis=axis, **kwargs)
result_masked = da.ma.fix_invalid(result)
return result_masked


def _rms(array, axis, **kwargs):
rval = np.sqrt(ma.average(array**2, axis=axis, **kwargs))

Expand Down Expand Up @@ -1940,7 +1953,9 @@ def interp_order(length):
"""


MEDIAN = Aggregator("median", ma.median)
MEDIAN = Aggregator(
"median", ma.median, lazy_func=_build_dask_mdtol_function(_lazy_median)
)
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the median over a :class:`~iris.cube.Cube`, as computed by
Expand All @@ -1953,8 +1968,7 @@ def interp_order(length):
result = cube.collapsed('longitude', iris.analysis.MEDIAN)


This aggregator handles masked data, but NOT lazy data. For lazy aggregation,
please try :obj:`~.PERCENTILE`.
This aggregator handles masked data and lazy data.

"""

Expand Down
97 changes: 97 additions & 0 deletions lib/iris/tests/unit/analysis/test_MEDIAN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for the :data:`iris.analysis.MEDIAN` aggregator."""

# Import iris.tests first so that some things can be initialised before
# importing anything else.
import iris.tests as tests # isort:skip

import numpy as np
import numpy.ma as ma

from iris._lazy_data import (
as_concrete_data,
as_lazy_data,
is_lazy_data,
is_lazy_masked_data,
)
from iris.analysis import MEDIAN


def _get_data(lazy=False, masked=False):
data = np.arange(16).reshape((4, 4))
if masked:
mask = np.eye(4)
data = ma.masked_array(data=data, mask=mask)
if lazy:
data = as_lazy_data(data)
return data


class Test_basics(tests.IrisTest):
def setUp(self):
self.data = _get_data()

def test_name(self):
self.assertEqual(MEDIAN.name(), "median")

def test_collapse(self):
data = MEDIAN.aggregate(self.data, axis=(0, 1))
self.assertArrayEqual(data, [7.5])


class Test_masked(tests.IrisTest):
def setUp(self):
self.data = _get_data(masked=True)

def test_output_is_masked(self):
result = MEDIAN.aggregate(self.data, axis=1)
self.assertTrue(ma.isMaskedArray(result))

def test_median_is_mask_aware(self):
# the median computed along one axis differs if the array is masked
axis = 1
result = MEDIAN.aggregate(self.data, axis=axis)
data_no_mask = _get_data()
result_no_mask = MEDIAN.aggregate(data_no_mask, axis=axis)
self.assertFalse(np.allclose(result, result_no_mask))


class Test_lazy(tests.IrisTest):
def setUp(self):
self.data = _get_data(lazy=True)

def test_output_is_lazy(self):
result = MEDIAN.lazy_aggregate(self.data, axis=(0, 1))
self.assertTrue(is_lazy_data(result))

def test_shape(self):
result = MEDIAN.lazy_aggregate(self.data, axis=1)
self.assertTupleEqual(result.shape, (4,))

def test_result_values(self):
axis = 1
result = MEDIAN.lazy_aggregate(self.data, axis=axis)
expected = np.median(as_concrete_data(self.data), axis=axis)
self.assertArrayAlmostEqual(result, expected)


class Test_lazy_masked(tests.IrisTest):
def setUp(self):
self.data = _get_data(lazy=True, masked=True)

def test_output_is_lazy_and_masked(self):
result = MEDIAN.lazy_aggregate(self.data, axis=1)
self.assertTrue(is_lazy_masked_data(result))

def test_result_values(self):
axis = 1
result = MEDIAN.lazy_aggregate(self.data, axis=axis)
expected = ma.median(as_concrete_data(self.data), axis=axis)
self.assertArrayAlmostEqual(result, expected)


if __name__ == "__main__":
tests.main()
Loading