Skip to content

Commit af100e4

Browse files
stephenworsleypre-commit-ci[bot]pp-mo
authored
Improve memory load for map_complete_blocks (#6730)
* improve memory for map_complete_blocks * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix test failures * add test * add whatsnew * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * remove import * Update lib/iris/_lazy_data.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * Update lib/iris/_lazy_data.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * Update lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * address review comments --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Patrick Peglar <patrick.peglar@metoffice.gov.uk>
1 parent 038dbb4 commit af100e4

File tree

3 files changed

+58
-4
lines changed

3 files changed

+58
-4
lines changed

docs/src/whatsnew/latest.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,10 @@ This document explains the changes made to Iris for this release
9696
constraints are given. This was previously only implemented where one such
9797
constraint was given. (:issue:`6228`, :pull:`6754`)
9898

99+
#. `@stephenworsley`_ reduced the memory load for regridding and other operations
100+
using :func:`~iris._lazy_data.map_complete_blocks` when the output chunks would
101+
exceed the optimum chunksize set in dask. (:pull:`6730`)
102+
99103

100104
🔥 Deprecations
101105
===============

lib/iris/_lazy_data.py

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -626,6 +626,14 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs):
626626
--------
627627
:func:`dask.array.map_blocks` : The function used for the mapping.
628628
629+
Notes
630+
-----
631+
.. note:
632+
633+
If the output chunks would be larger than the maximum chunksize set
634+
in the dask config, the input is rechunked, where possible to
635+
optimise the output chunksize.
636+
629637
"""
630638
data = None
631639
result = None
@@ -640,17 +648,40 @@ def map_complete_blocks(src, func, dims, out_sizes, dtype, *args, **kwargs):
640648
else:
641649
data = src.lazy_data()
642650

651+
shape = list(src.shape)
652+
643653
if result is None and data is not None:
644654
# Ensure dims are not chunked
645655
in_chunks = list(data.chunks)
646656
for dim in dims:
647-
in_chunks[dim] = src.shape[dim]
648-
data = data.rechunk(in_chunks)
657+
in_chunks[dim] = (src.shape[dim],)
649658

650659
# Determine output chunks
651-
out_chunks = list(data.chunks)
660+
out_chunks = in_chunks.copy()
652661
for dim, size in zip(dims, out_sizes):
653-
out_chunks[dim] = size
662+
out_chunks[dim] = (size,)
663+
shape[dim] = size
664+
665+
# Ensure the chunksize of the output is a reasonable size.
666+
max_outchunks = [max(chunk) for chunk in out_chunks]
667+
df = tuple(i in dims for i in range(len(shape)))
668+
dtype = np.dtype(dtype)
669+
opt_outchunks = _optimum_chunksize(
670+
max_outchunks, shape, dtype=dtype, dims_fixed=df
671+
)
672+
for i, (chunk, max_out, opt_out) in enumerate(
673+
zip(out_chunks, max_outchunks, opt_outchunks)
674+
):
675+
if opt_out < max_out:
676+
new_chunks = []
677+
for c in chunk:
678+
new_chunks.extend((c // opt_out) * [opt_out])
679+
if chunk_end := c % opt_out:
680+
new_chunks.append(chunk_end)
681+
in_chunks[i] = tuple(new_chunks)
682+
out_chunks[i] = tuple(new_chunks)
683+
684+
data = data.rechunk(in_chunks)
654685

655686
# Assume operation preserves mask.
656687
meta = da.utils.meta_from_array(data).astype(dtype)

lib/iris/tests/unit/lazy_data/test_map_complete_blocks.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
from unittest.mock import Mock, PropertyMock
88

99
import dask.array as da
10+
import dask.config
1011
import numpy as np
1112

1213
from iris._lazy_data import is_lazy_data, map_complete_blocks
@@ -134,3 +135,21 @@ def test_multidimensional_input(self):
134135
)
135136
assert is_lazy_data(result)
136137
assert_array_equal(result.compute(), array + 1)
138+
139+
def test_rechunking(self):
140+
# Choose a dask array with an irregularly chunked dimension to be rechunked.
141+
lazy_array = da.ones((5, 10, 9, 10), chunks=(2, 10, 5, 5))
142+
cube, _ = create_mock_cube(lazy_array)
143+
144+
# Reduce the optimum dask chunksize.
145+
with dask.config.set({"array.chunk-size": "32KiB"}):
146+
result = map_complete_blocks(
147+
cube, self.func, dims=(1, 3), out_sizes=(30, 40), dtype=lazy_array.dtype
148+
)
149+
assert is_lazy_data(result)
150+
expected_chunksize = (1, 30, 2, 40)
151+
assert result.chunksize == expected_chunksize
152+
# Note that one chunk is irregularly rechunked and the other isn't.
153+
assert result.chunks[0] == (1, 1, 1, 1, 1)
154+
# split from the original chunks of (5, 4)
155+
assert result.chunks[2] == (2, 2, 1, 2, 2)

0 commit comments

Comments
 (0)