Skip to content

Commit 8e011bd

Browse files
authored
Ome ngff
Add helper functions `open_ome_ds` and `prepare_ome_ds`. Adding support for `types` in the metadata to specify axis types such as "space", "time", and "channel".
1 parent 654a639 commit 8e011bd

15 files changed

+675
-164
lines changed

.pre-commit-config.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
rev: v0.3.7
4+
hooks:
5+
- id: ruff
6+
args: [--fix, --exit-non-zero-on-fix]

funlib/persistence/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from .arrays import Array, open_ds, prepare_ds # noqa
1+
from .arrays import Array, open_ds, prepare_ds, open_ome_ds, prepare_ome_ds # noqa
22

33
__version__ = "0.5.4"
44
__version_info__ = tuple(int(i) for i in __version__.split("."))

funlib/persistence/arrays/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
from .array import Array # noqa
22
from .datasets import prepare_ds, open_ds # noqa
3+
from .ome_datasets import prepare_ome_ds, open_ome_ds # noqa

funlib/persistence/arrays/array.py

Lines changed: 45 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import logging
2+
from collections.abc import Sequence
23
from functools import reduce
3-
from typing import Any, Optional, Sequence, Union
4+
from typing import Any, Optional, Union
45

56
import dask.array as da
67
import numpy as np
@@ -45,6 +46,17 @@ class Array(Freezable):
4546
4647
The units of each spatial dimension.
4748
49+
types (``Optional[Sequence[str]]``):
50+
51+
The type of each dimension. Can be "space", "channel", or "time".
52+
We treat both "space" and "time" as spatial dimensions for indexing with
53+
Roi and Coordinate classes in world units.
54+
If not provided, we fall back on the axis names and assume "channel"
55+
for any axis name that ends with "^" and "space" otherwise. If neither
56+
are provided we assume "space" for all dimensions.
57+
Note that the axis name parsing is depricated and will be removed in
58+
future versions and we highly recommend providing the types directly.
59+
4860
chunks (`tuple[int]` or `str` or `int`, optional):
4961
5062
See https://docs.dask.org/en/stable/generated/dask.array.from_array.html for
@@ -56,6 +68,12 @@ class Array(Freezable):
5668
lazy_ops, please look into either the `.lazy_op` method or the
5769
`SequentialLazyOp` class.
5870
71+
strict_metadata (``bool``):
72+
73+
If True, then all metadata fields must be provided, including
74+
offset, voxel_size, axis_names, units, and types. Metadata
75+
can either be passed in or read from array attributes.
76+
5977
"""
6078

6179
data: da.Array
@@ -67,8 +85,10 @@ def __init__(
6785
voxel_size: Optional[Sequence[int]] = None,
6886
axis_names: Optional[Sequence[str]] = None,
6987
units: Optional[Sequence[str]] = None,
88+
types: Optional[Sequence[str]] = None,
7089
chunks: Optional[Union[int, Sequence[int], str]] = "auto",
7190
lazy_op: Optional[LazyOp] = None,
91+
strict_metadata: bool = False,
7292
):
7393
if not isinstance(data, da.Array):
7494
self.data = da.from_array(data, chunks=chunks)
@@ -81,7 +101,9 @@ def __init__(
81101
voxel_size=Coordinate(voxel_size) if voxel_size is not None else None,
82102
axis_names=list(axis_names) if axis_names is not None else None,
83103
units=list(units) if units is not None else None,
104+
types=list(types) if types is not None else None,
84105
shape=self._source_data.shape,
106+
strict=strict_metadata,
85107
)
86108

87109
# used for custom metadata unrelated to indexing with physical units
@@ -96,7 +118,7 @@ def __init__(
96118

97119
self.freeze()
98120

99-
self.validate()
121+
self.validate(strict_metadata)
100122

101123
@property
102124
def attrs(self) -> dict:
@@ -115,11 +137,17 @@ def chunk_shape(self) -> Coordinate:
115137
return Coordinate(self.data.chunksize)
116138

117139
def uncollapsed_dims(self, physical: bool = False) -> list[bool]:
140+
"""
141+
We support lazy slicing of arrays, such as `x = Array(np.ones(5,5,5))` and
142+
`x.lazy_op(np.s![0, :, :])`. This will result in the first dimension being
143+
collapsed and future slicing operations will need to be 2D.
144+
We use `_uncollapsed_dims` to keep track of which dimensions are sliceable.
145+
"""
118146
if physical:
119147
return [
120148
x
121-
for x, c in zip(self._uncollapsed_dims, self._metadata.axis_names)
122-
if not c.endswith("^")
149+
for x, t in zip(self._uncollapsed_dims, self._metadata.types)
150+
if t in ["space", "time"]
123151
]
124152
else:
125153
return self._uncollapsed_dims
@@ -165,14 +193,22 @@ def axis_names(self) -> list[str]:
165193
if uncollapsed
166194
]
167195

196+
@property
197+
def types(self) -> list[str]:
198+
return [
199+
self._metadata.types[ii]
200+
for ii, uncollapsed in enumerate(self.uncollapsed_dims(physical=False))
201+
if uncollapsed
202+
]
203+
168204
@property
169205
def physical_shape(self):
170206
return tuple(
171207
self._source_data.shape[ii]
172-
for ii, (uncollapsed, name) in enumerate(
173-
zip(self.uncollapsed_dims(physical=False), self._metadata.axis_names)
208+
for ii, (uncollapsed, type) in enumerate(
209+
zip(self.uncollapsed_dims(physical=False), self._metadata.types)
174210
)
175-
if uncollapsed and not name.endswith("^")
211+
if uncollapsed and type in ["space", "time"]
176212
)
177213

178214
@property
@@ -438,8 +474,8 @@ def __index(self, coordinate):
438474
index = (Ellipsis,) + index
439475
return index
440476

441-
def validate(self):
442-
self._metadata.validate()
477+
def validate(self, strict: bool = False):
478+
self._metadata.validate(strict)
443479
assert len(self.axis_names) == len(self._source_data.shape), (
444480
f"Axis names must be provided for every dimension. Got ({self.axis_names}) "
445481
f"but expected {len(self.shape)} to match the data shape: {self.shape}"

funlib/persistence/arrays/datasets.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import logging
2-
from typing import Any, Optional, Sequence, Union
2+
from collections.abc import Sequence
3+
from typing import Any, Optional, Union
34

45
import numpy as np
56
import zarr
@@ -29,7 +30,9 @@ def open_ds(
2930
voxel_size: Optional[Sequence[int]] = None,
3031
axis_names: Optional[Sequence[str]] = None,
3132
units: Optional[Sequence[str]] = None,
33+
types: Optional[Sequence[str]] = None,
3234
chunks: Optional[Union[int, Sequence[int], str]] = "strict",
35+
strict_metadata: bool = False,
3336
**kwargs,
3437
) -> Array:
3538
"""
@@ -68,6 +71,11 @@ def open_ds(
6871
6972
An override for the units of your dataset.
7073
74+
types (`str`, (optional)):
75+
76+
An override for the types of your axes. For more details see:
77+
https://ngff.openmicroscopy.org/latest/#axes-md
78+
7179
chunks (`Coordinate`, (optional)):
7280
7381
An override for the size of the chunks in the dataset.
@@ -77,6 +85,11 @@ def open_ds(
7785
See https://docs.dask.org/en/stable/generated/dask.array.from_array.html
7886
for more information.
7987
88+
strict_metadata (`bool`, (optional)):
89+
90+
If True, all metadata fields (offset, voxel_size, axis_names, units, types)
91+
must be provided either as arguments or read from dataset attributes.
92+
8093
kwargs:
8194
8295
See additional arguments available here:
@@ -106,6 +119,8 @@ def open_ds(
106119
voxel_size=voxel_size,
107120
axis_names=axis_names,
108121
units=units,
122+
types=types,
123+
strict=strict_metadata,
109124
)
110125

111126
return Array(
@@ -114,6 +129,7 @@ def open_ds(
114129
metadata.voxel_size,
115130
metadata.axis_names,
116131
metadata.units,
132+
metadata.types,
117133
data.chunks if chunks == "strict" else chunks,
118134
)
119135

@@ -125,6 +141,7 @@ def prepare_ds(
125141
voxel_size: Optional[Coordinate] = None,
126142
axis_names: Optional[Sequence[str]] = None,
127143
units: Optional[Sequence[str]] = None,
144+
types: Optional[Sequence[str]] = None,
128145
chunk_shape: Optional[Sequence[int]] = None,
129146
dtype: DTypeLike = np.float32,
130147
mode: str = "a",
@@ -156,8 +173,7 @@ def prepare_ds(
156173
157174
axis_names:
158175
159-
The axis names of the dataset to create. The names of non-physical
160-
dimensions should end with "^". e.g. ["samples^", "channels^", "z", "y", "x"]
176+
The axis names of the dataset to create.
161177
Set to ["c{i}^", "d{j}"] by default. Where i, j are the index of the non-physical
162178
and physical dimensions respectively.
163179
@@ -166,6 +182,17 @@ def prepare_ds(
166182
The units of the dataset to create. Only provide for physical dimensions.
167183
Set to all "" by default.
168184
185+
types:
186+
187+
The types of the axes of the dataset to create. For more details see:
188+
https://ngff.openmicroscopy.org/latest/#axes-md
189+
If not provided, we will first fall back on to axis_names if provided
190+
and use "channel" for axis names ending in "^", and "space" otherwise.
191+
If neither are provided, we will assume all dimensions are spatial.
192+
Note that axis name parsing is depricated and will be removed in the
193+
future. Please provide types directly if you have a mix of spatial and
194+
non-spatial dimensions.
195+
169196
chunk_shape:
170197
171198
The shape of the chunks to use in the dataset. For all dimensions,
@@ -207,6 +234,7 @@ def prepare_ds(
207234
voxel_size=voxel_size,
208235
axis_names=axis_names,
209236
units=units,
237+
types=types,
210238
)
211239

212240
try:
@@ -256,6 +284,14 @@ def prepare_ds(
256284
)
257285
metadata_compatible = False
258286

287+
if given_metadata.types != existing_metadata.types:
288+
logger.info(
289+
"Types differ: given (%s) vs parsed (%s)",
290+
given_metadata.types,
291+
existing_metadata.types,
292+
)
293+
metadata_compatible = False
294+
259295
if given_metadata.axis_names != existing_metadata.axis_names:
260296
logger.info(
261297
"Axis names differ: given (%s) vs parsed (%s)",
@@ -298,6 +334,7 @@ def prepare_ds(
298334
existing_metadata.voxel_size,
299335
existing_metadata.axis_names,
300336
existing_metadata.units,
337+
existing_metadata.types,
301338
ds.chunks,
302339
)
303340

@@ -308,6 +345,7 @@ def prepare_ds(
308345
voxel_size=voxel_size,
309346
axis_names=axis_names,
310347
units=units,
348+
types=types,
311349
)
312350

313351
# create the dataset
@@ -330,6 +368,7 @@ def prepare_ds(
330368
default_metadata_format.units_attr: combined_metadata.units,
331369
default_metadata_format.voxel_size_attr: combined_metadata.voxel_size,
332370
default_metadata_format.offset_attr: combined_metadata.offset,
371+
default_metadata_format.types_attr: combined_metadata.types,
333372
}
334373
# check keys don't conflict
335374
if custom_metadata is not None:
@@ -339,6 +378,6 @@ def prepare_ds(
339378
ds.attrs.put(our_metadata)
340379

341380
# open array
342-
array = Array(ds, offset, voxel_size, axis_names, units)
381+
array = Array(ds, offset, voxel_size, axis_names, units, types)
343382

344383
return array

0 commit comments

Comments
 (0)