Skip to content

Commit

Permalink
Merge pull request #17 from bioio-devs/bugfix/read-zarr-from-s3
Browse files Browse the repository at this point in the history
(Not a fork) Bugfix: read zarr from s3
  • Loading branch information
pgarrison authored May 2, 2024
2 parents 6e29c3d + d6908af commit 3b434d0
Show file tree
Hide file tree
Showing 3 changed files with 61 additions and 19 deletions.
9 changes: 9 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,15 @@ img = BioImage("my_file.zarr", reader=bioio_ome_zarr.Reader)
img.data
```

### Reading from AWS S3
To read from private S3 buckets or public buckets using `s3://` paths, [credentials](https://docs.aws.amazon.com/cli/latest/userguide/cli-configure-files.html) must be configured. Public buckets can be accessed without credentials by using the `https://` path.
```python
from bioio import BioImage
path = "https://allencell.s3.amazonaws.com/aics/nuc_morph_data/data_for_analysis/baseline_colonies/20200323_09_small/raw.ome.zarr"
image = BioImage(path)
print(image.get_image_dask_data())
```

## Issues
[_Click here to view all open issues in bioio-devs organization at once_](https://github.com/search?q=user%3Abioio-devs+is%3Aissue+is%3Aopen&type=issues&ref=advsearch) or check this repository's issue tab.

Expand Down
34 changes: 15 additions & 19 deletions bioio_ome_zarr/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,10 @@ class Reader(reader.Reader):
Parameters
----------
image: Any
Some type of object to read and follow the Reader specification.
image: types.PathLike
String or Path to the ZARR root
fs_kwargs: Dict[str, Any]
Any specific keyword arguments to pass down to the fsspec created filesystem.
Default: {}
Notes
-----
It is up to the implementer of the Reader to decide which types they would like to
accept (certain readers may not support buffers for example).
Ignored
"""

_xarray_dask_data: Optional["xr.DataArray"] = None
Expand Down Expand Up @@ -65,17 +58,19 @@ def __init__(
# Enforce valid image
if not self._is_supported_image(self._fs, self._path):
raise exceptions.UnsupportedFileFormatError(
self.__class__.__name__, self._path
self.__class__.__name__,
self._path,
"Could not find a .zgroup or .zarray file at the provided path.",
)

self._zarr = ZarrReader(parse_url(self._path, mode="r")).zarr
self._zarr = get_zarr_reader(self._fs, self._path).zarr
self._physical_pixel_sizes: Optional[types.PhysicalPixelSizes] = None
self._channel_names: Optional[List[str]] = None

@staticmethod
def _is_supported_image(fs: AbstractFileSystem, path: str, **kwargs: Any) -> bool:
try:
ZarrReader(parse_url(path, mode="r"))
get_zarr_reader(fs, path)
return True

except AttributeError:
Expand All @@ -89,12 +84,7 @@ def is_supported_image(
**kwargs: Any,
) -> bool:
if isinstance(image, (str, Path)):
try:
ZarrReader(parse_url(image, mode="r"))
return True

except AttributeError:
return False
return cls._is_supported_image(None, str(image), **kwargs)
else:
return reader.Reader.is_supported_image(
cls, image, fs_kwargs=fs_kwargs, **kwargs
Expand Down Expand Up @@ -269,3 +259,9 @@ def _get_coords(
coords[dimensions.DimensionNames.Channel] = channel_names

return coords


def get_zarr_reader(fs: AbstractFileSystem, path: str) -> ZarrReader:
if fs is not None:
path = fs.unstrip_protocol(path)
return ZarrReader(parse_url(path, mode="r"))
37 changes: 37 additions & 0 deletions bioio_ome_zarr/tests/test_s3_read.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import numpy as np

from bioio_ome_zarr import Reader


def test_ome_zarr_reader() -> None:
# ARRANGE
uri = (
# Cannot use s3:// URL due to ome-zarr issue #369
# "s3://allencell/aics/nuc_morph_data"
"https://allencell.s3.amazonaws.com/aics/nuc_morph_data"
"/data_for_analysis/baseline_colonies/20200323_09_small/raw.ome.zarr"
)
scene = "/"
resolution_level = 0

# ACT
image_container = Reader(uri, fs_kwargs=dict(anon=True))
image_container.set_scene(scene)
image_container.set_resolution_level(resolution_level)

# ASSERT
assert image_container.scenes == (scene,)
assert image_container.current_scene == scene
assert image_container.resolution_levels == (0, 1, 2, 3, 4)
assert image_container.shape == (570, 2, 42, 1248, 1824)
assert image_container.dtype == np.uint16
assert image_container.dims.order == "TCZYX"
assert image_container.dims.shape == (570, 2, 42, 1248, 1824)
assert image_container.channel_names == ["low_EGFP", "low_Bright"]
assert image_container.current_resolution_level == resolution_level
# pixel sized in (Z, Y, X) order
assert image_container.physical_pixel_sizes == (
0.53,
0.2708333333333333,
0.2708333333333333,
)

0 comments on commit 3b434d0

Please sign in to comment.