Skip to content

Commit 19e0ab3

Browse files
committed
Updated docstrings and comments for release
1 parent a9cbba1 commit 19e0ab3

File tree

5 files changed

+81
-73
lines changed

5 files changed

+81
-73
lines changed

XarrayActive/active_chunk.py

+43-14
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
class ActiveOptionsContainer:
66
"""
7-
Container for ActiveOptions properties.
7+
Container for ActiveOptions properties. Only for use within XarrayActive.
88
"""
99
@property
1010
def active_options(self):
@@ -39,19 +39,24 @@ def _set_active_options(self, chunks={}, chunk_limits=True):
3939
self._active_chunks = chunks
4040
self._chunk_limits = chunk_limits
4141

42-
# Holds all Active routines.
4342
class ActiveChunk:
43+
"""
44+
Container class for all Active-required methods to perform on each chunk.
45+
All active-per-chunk content should be found here.
46+
"""
4447

45-
description = "Container class for Active routines performed on each chunk. All active-per-chunk content can be found here."
48+
description = "Container class for Active routines performed on each chunk."
4649

4750
def _post_process_data(self, data):
48-
# Perform any post-processing steps on the data here
51+
"""
52+
Perform any post-processing steps on the data here.
53+
"""
4954
return data
5055

5156
def _standard_sum(self, axes=None, skipna=None, **kwargs):
5257
"""
53-
Standard Mean routine matches the normal routine for dask, required at this
54-
stage if Active mean not available.
58+
Standard sum routine matches the normal routine for dask, required at this
59+
stage if Active mean/sum not available.
5560
"""
5661

5762
arr = np.array(self)
@@ -62,12 +67,31 @@ def _standard_sum(self, axes=None, skipna=None, **kwargs):
6267
return total
6368

6469
def _standard_max(self, axes=None, skipna=None, **kwargs):
70+
"""
71+
Standard max routine if Active not available, warning will be given.
72+
Kwargs may be necessary to add here.
73+
"""
6574
return np.max(self, axis=axes)
6675

6776
def _standard_min(self, axes=None, skipna=None, **kwargs):
77+
"""
78+
Standard min routine if Active not available, warning will be given.
79+
Kwargs may be necessary to add here.
80+
"""
6881
return np.min(self, axis=axes)
6982

7083
def _numel(self, method, axes=None):
84+
"""
85+
Number of elements remaining after a reduction, to allow
86+
dask to combine reductions from all different chunks.
87+
Example:
88+
(2,3,4) chunk reduced along second dimension. Will
89+
give a (2,3) array where each value is 4 - for the
90+
length of the dimension along which a reduction
91+
took place.
92+
93+
"""
94+
# Applied reduction across all axes
7195
if not axes:
7296
return self.size
7397

@@ -98,20 +122,20 @@ def active_method(self, method, axis=None, skipna=None, **kwargs):
98122
'max' : self._standard_max,
99123
'min' : self._standard_min
100124
}
101-
ret = None
125+
partial = None
102126
n = self._numel(method, axes=axis)
103127

104128
try:
105129
from activestorage.active import Active
106130
except ImportError:
107131
# Unable to import Active package. Default to using normal mean.
108132
print("ActiveWarning: Unable to import active module - defaulting to standard method.")
109-
ret = {
133+
partial = {
110134
'n': n,
111135
'total': standard_methods[method](axes=axis, skipna=skipna, **kwargs)
112136
}
113137

114-
if not ret:
138+
if not partial:
115139

116140
# Create Active client
117141
active = Active(self.filename, self.address)
@@ -131,13 +155,14 @@ def active_method(self, method, axis=None, skipna=None, **kwargs):
131155
data = active[extent]
132156
t = self._post_process_data(data) * n
133157

134-
ret = {
158+
partial = {
135159
'n': n,
136160
'total': t
137161
}
138162

139-
if not ret:
163+
if not partial:
140164
# Experimental Recursive requesting to get each 1D column along the axes being requested.
165+
# - May be very bad performance due to many requests for (1,1,X) shapes
141166
range_recursives = []
142167
for dim in range(self.ndim):
143168
if dim not in axis:
@@ -147,17 +172,21 @@ def active_method(self, method, axis=None, skipna=None, **kwargs):
147172
results = np.array(self._get_elements(active, range_recursives, hyperslab=[]))
148173

149174
t = self._post_process_data(results) * n
150-
ret = {
175+
partial = {
151176
'n': n,
152177
'total': t
153178
}
154179

155180
if method == 'mean':
156-
return ret
181+
return partial
157182
else:
158-
return ret['total']/ret['n']
183+
return partial['total']/partial['n']
159184

160185
def _get_elements(self, active, recursives, hyperslab=[]):
186+
"""
187+
Recursive function to fetch and arrange the appropriate column slices
188+
from Active.
189+
"""
161190
dimarray = []
162191
if not len(recursives) > 0:
163192

XarrayActive/active_dask.py

+7
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66

77
from .active_chunk import ActiveChunk
88

9+
## Partition Methods are the first step in the Dask Reductions.
10+
911
def partition_mean(arr, *args, **kwargs):
1012
return partition_method(arr, 'mean', *args, **kwargs)
1113

@@ -29,6 +31,10 @@ def partition_method(arr, method, *args, **kwargs):
2931
# Computing meta - dask operation not fully utilised.
3032
return None
3133

34+
## Combining results from Partition methods
35+
# - Dask built-in mean-agg and mean-combine for mean.
36+
# - Min/Max/Sum require simple functions for combine/aggregation.
37+
3238
def general_combine(pairs, axis=None):
3339
if not isinstance(pairs, list):
3440
pairs = [pairs]
@@ -49,6 +55,7 @@ class DaskActiveArray(da.Array):
4955

5056
@property
5157
def is_active(self):
58+
# Quick way of distinguishing from Dask Array
5259
return True
5360

5461
def copy(self):

XarrayActive/active_xarray.py

+15-53
Original file line numberDiff line numberDiff line change
@@ -14,53 +14,18 @@ class ActiveDataArray(DataArray):
1414
# No additional properties
1515
__slots__ = ()
1616

17-
def mean(
18-
self,
19-
*args,
20-
**kwargs,
21-
):
22-
23-
return self._active_op(
24-
dataarray_active_mean,
25-
*args,
26-
**kwargs,
27-
)
17+
# Override Xarray DataArray standard functions in favour of Active enabled ones.
18+
def mean(self, *args,**kwargs):
19+
return self._active_op(dataarray_active_mean, *args, **kwargs)
2820

29-
def max(
30-
self,
31-
*args,
32-
**kwargs,
33-
):
34-
35-
return self._active_op(
36-
dataarray_active_max,
37-
*args,
38-
**kwargs,
39-
)
21+
def max(self, *args,**kwargs):
22+
return self._active_op(dataarray_active_max, *args, **kwargs)
4023

41-
def min(
42-
self,
43-
*args,
44-
**kwargs,
45-
):
46-
47-
return self._active_op(
48-
dataarray_active_min,
49-
*args,
50-
**kwargs,
51-
)
24+
def min(self, *args,**kwargs):
25+
return self._active_op(dataarray_active_min, *args, **kwargs)
5226

53-
def sum(
54-
self,
55-
*args,
56-
**kwargs,
57-
):
58-
59-
return self._active_op(
60-
dataarray_active_sum,
61-
*args,
62-
**kwargs,
63-
)
27+
def sum(self, *args,**kwargs):
28+
return self._active_op(dataarray_active_sum, *args, **kwargs)
6429

6530
def _active_op(
6631
self,
@@ -72,7 +37,7 @@ def _active_op(
7237
**kwargs,
7338
):
7439
"""
75-
Reduce this DataArray's data by applying an operation along some dimension(s).
40+
Reduce this DataArray's data by applying an ``active`` operation along some dimension(s).
7641
7742
Parameters
7843
----------
@@ -96,21 +61,16 @@ def _active_op(
9661
Returns
9762
-------
9863
reduced : DataArray
99-
New DataArray with ``max`` applied to its data and the
64+
New DataArray with reduction applied to its data and the
10065
indicated dimension(s) removed
10166
102-
See Also
103-
--------
104-
numpy.max
105-
dask.array.max
10667
"""
10768
return self.reduce(
10869
op,
10970
dim=dim,
11071
skipna=skipna,
11172
keep_attrs=keep_attrs,
112-
**kwargs,
113-
)
73+
**kwargs)
11474

11575
class ActiveDataset(Dataset):
11676

@@ -150,6 +110,7 @@ def _construct_dataarray(self, name):
150110
fastpath=True
151111
)
152112

113+
## DataArray methods to apply to the DaskActiveArray
153114
def dataarray_active_mean(array, *args, **kwargs):
154115
return dataarray_active_method(array, 'mean', *args, **kwargs)
155116

@@ -164,7 +125,7 @@ def dataarray_active_sum(array, *args, **kwargs):
164125

165126
def dataarray_active_method(array: DaskActiveArray, method: str, axis=None, skipna=None, **kwargs):
166127
"""
167-
Function provided to dask reduction, activates the ``active_mean`` method of the ``DaskActiveArray``.
128+
Function provided to dask reduction, activates the ``active`` methods of the ``DaskActiveArray``.
168129
169130
:param array: (obj) A DaskActiveArray object which has additional methods enabling Active operations.
170131
@@ -183,6 +144,7 @@ def dataarray_active_method(array: DaskActiveArray, method: str, axis=None, skip
183144
'sum': array.active_sum
184145
}
185146

147+
# On failure of the Active method, can use Duck methods instead - normal behaviour.
186148
duck_methods = {
187149
'mean': duck_array_ops.mean,
188150
'max': duck_array_ops.max,

XarrayActive/datastore.py

+11
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,28 @@ class ActiveDataStore(NetCDF4DataStore, ActiveOptionsContainer):
1919

2020
def get_variables(self):
2121
"""
22+
Override normal store behaviour to allow opening some variables 'actively'
2223
"""
2324
return FrozenDict(
2425
(k, self.open_variable(k, v)) for k, v in self.ds.variables.items()
2526
)
2627

2728
def open_variable(self, name: str, var):
29+
"""
30+
Allow opening some variables 'actively', if they are not a dimension (where
31+
you'll want the whole array anyway) and where the active chunks are specified
32+
- required by XarrayActive.
33+
"""
2834
if name in self.ds.dimensions or not self._active_chunks:
2935
return self.open_store_variable(name, var)
3036
else:
3137
return self.open_active_variable(name, var)
3238

3339
def open_active_variable(self, name: str, var):
40+
"""
41+
Utilise the ActiveArrayWrapper builder to obtain the data
42+
Lazily for this variable so active methods can be applied later.
43+
"""
3444
import netCDF4
3545

3646
dimensions = var.dimensions
@@ -52,6 +62,7 @@ def open_active_variable(self, name: str, var):
5262
)
5363
)
5464

65+
# Everything after this point is normal store behaviour
5566
encoding = {}
5667

5768
if isinstance(var.datatype, netCDF4.EnumType):

XarrayActive/wrappers.py

+5-6
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,9 @@
66
get_chunk_positions,
77
get_chunk_extent,
88
get_dask_chunks,
9-
combine_slices,
10-
normalize_partition_chunks
9+
combine_slices
1110
)
1211
from .active_chunk import (
13-
ActiveChunk,
1412
ActiveOptionsContainer
1513
)
1614

@@ -23,8 +21,8 @@
2321

2422
class ActivePartition(ArrayPartition):
2523
"""
26-
Combines ActiveChunk - active methods, and ArrayPartition - array methods
27-
into a single ChunkWrapper class.
24+
Container for future ActivePartition behaviour, may not be required unless
25+
additional behaviour is required.
2826
"""
2927
def copy(self, extent=None):
3028

@@ -66,6 +64,7 @@ def __init__(
6664
super().__init__(shape, units=units, dtype=dtype)
6765

6866
# Further work required to get this to work - 23/08/24
67+
6968
#self._active_chunks = normalize_partition_chunks(
7069
# self._active_chunks,
7170
# self.shape,
@@ -102,7 +101,7 @@ def __array__(self, *args, **kwargs):
102101
return self._variable
103102
else:
104103

105-
# for every dask chunk return a smaller object with the right extent.
104+
# For every dask chunk return a smaller object with the right extent.
106105
# Create a chunk_shape tuple from chunks and _variable (figure out which chunk and which axis, divide etc.)
107106
# Define a subarray for each chunk, with appropriate index.
108107

0 commit comments

Comments
 (0)