diff --git a/CHANGELOG.md b/CHANGELOG.md index 7dd0757e..2f9637e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,12 @@ -## [1.0.11] - 2024-xx-xx +## [1.x.x] - xxxx-xx-xx + +## [1.0.11] - 2024-04-26 ### Added - Can overlay a custom segmentation (merge boundaries) - Xenium Explorer selection(s) can be added as shapes in a SpatialData object +- Optionnal OpenSlide backend for WSI data +- New `sopa.io.aicsimageio` reader for special formats (#58) ### Changed - Rename `Aggregator.update_table` to `Aggregator.compute_table` diff --git a/docs/api/_sdata.md b/docs/api/_sdata.md index 8683d4ee..5b311c56 100644 --- a/docs/api/_sdata.md +++ b/docs/api/_sdata.md @@ -2,25 +2,13 @@ These are convenient tools that operates on `SpatialData` objects ::: sopa._sdata.get_boundaries - options: - show_root_heading: true ::: sopa._sdata.get_intrinsic_cs - options: - show_root_heading: true ::: sopa._sdata.to_intrinsic - options: - show_root_heading: true ::: sopa._sdata.get_intensities - options: - show_root_heading: true ::: sopa._sdata.iter_scales - options: - show_root_heading: true ::: sopa._sdata.get_spatial_image - options: - show_root_heading: true diff --git a/docs/api/annotation.md b/docs/api/annotation.md new file mode 100644 index 00000000..856f235f --- /dev/null +++ b/docs/api/annotation.md @@ -0,0 +1,5 @@ +::: sopa.annotation.tangram_annotate + +::: sopa.annotation.higher_z_score + +::: sopa.annotation.preprocess_fluo diff --git a/docs/api/annotation/fluorescence.md b/docs/api/annotation/fluorescence.md deleted file mode 100644 index 882af767..00000000 --- a/docs/api/annotation/fluorescence.md +++ /dev/null @@ -1,7 +0,0 @@ -::: sopa.annotation.higher_z_score - options: - show_root_heading: true - -::: sopa.annotation.preprocess_fluo - options: - show_root_heading: true diff --git a/docs/api/annotation/tangram.md b/docs/api/annotation/tangram.md deleted file mode 100644 index 2a3cfb23..00000000 --- a/docs/api/annotation/tangram.md +++ /dev/null @@ -1,3 +0,0 @@ -::: sopa.annotation.tangram.tangram_annotate - options: - show_root_heading: true diff --git a/docs/api/embedding.md b/docs/api/embedding.md index ab496d60..023255ad 100644 --- a/docs/api/embedding.md +++ b/docs/api/embedding.md @@ -1,3 +1 @@ ::: sopa.embedding.embed_wsi_patches - options: - show_root_heading: true diff --git a/docs/api/io.explorer.md b/docs/api/io.explorer.md deleted file mode 100644 index 26364a3a..00000000 --- a/docs/api/io.explorer.md +++ /dev/null @@ -1,47 +0,0 @@ -::: sopa.io.explorer.write - options: - show_root_heading: true - -::: sopa.io.explorer.write_image - options: - show_root_heading: true - -::: sopa.io.explorer.write_cell_categories - options: - show_root_heading: true - -::: sopa.io.explorer.write_transcripts - options: - show_root_heading: true - -::: sopa.io.explorer.write_gene_counts - options: - show_root_heading: true - -::: sopa.io.explorer.write_polygons - options: - show_root_heading: true - -::: sopa.io.explorer.write_metadata - options: - show_root_heading: true - -::: sopa.io.explorer.int_cell_id - options: - show_root_heading: true - -::: sopa.io.explorer.str_cell_id - options: - show_root_heading: true - -::: sopa.io.explorer.align - options: - show_root_heading: true - -::: sopa.io.explorer.save_column_csv - options: - show_root_heading: true - -::: sopa.io.explorer.add_explorer_selection - options: - show_root_heading: true diff --git a/docs/api/io.md b/docs/api/io.md index 8beb0005..45bbab0b 100644 --- a/docs/api/io.md +++ b/docs/api/io.md @@ -1,45 +1,51 @@ +# sopa.io + !!! notes Due to many updates in the data format provided by the different companies, you might have issues loading your data. In this case, consider [opening an issue](https://github.com/gustaveroussy/sopa/issues) detailing the version of the machine you used and the error log, as well as an example of file names that you are trying to read. !!! notes "Related to `spatialdata-io`" A library called [`spatialdata-io`](https://spatialdata.scverse.org/projects/io/en/latest/) already contains a lot of readers. Here, we updated some readers already existing in `spatialdata-io`, and added a few others. In the future, we will completely rely on `spatialdata-io`. +## Readers + ::: sopa.io.xenium - options: - show_root_heading: true ::: sopa.io.merscope - options: - show_root_heading: true ::: sopa.io.cosmx - options: - show_root_heading: true ::: sopa.io.macsima - options: - show_root_heading: true ::: sopa.io.phenocycler - options: - show_root_heading: true ::: sopa.io.hyperion - options: - show_root_heading: true + +::: sopa.io.aicsimageio ::: sopa.io.ome_tif - options: - show_root_heading: true ::: sopa.io.wsi - options: - show_root_heading: true ::: sopa.io.uniform - options: - show_root_heading: true ::: sopa.io.blobs - options: - show_root_heading: true + +## Xenium Explorer + +::: sopa.io.write + +::: sopa.io.align + +::: sopa.io.add_explorer_selection + +::: sopa.io.int_cell_id + +::: sopa.io.str_cell_id + +::: sopa.io.write_image + +::: sopa.io.save_column_csv + +## Report + +::: sopa.io.write_report diff --git a/docs/api/io.report.md b/docs/api/io.report.md deleted file mode 100644 index fdf61a1f..00000000 --- a/docs/api/io.report.md +++ /dev/null @@ -1,59 +0,0 @@ -::: sopa.io.report.write_report - options: - show_root_heading: true - -::: sopa.io.report.engine.Renderable - options: - show_root_heading: true - -::: sopa.io.report.engine.Title - options: - show_root_heading: true - -::: sopa.io.report.engine.Paragraph - options: - show_root_heading: true - -::: sopa.io.report.engine.Message - options: - show_root_heading: true - -::: sopa.io.report.engine.Block - options: - show_root_heading: true - -::: sopa.io.report.engine.CodeBlock - options: - show_root_heading: true - -::: sopa.io.report.engine.ProgressBar - options: - show_root_heading: true - -::: sopa.io.report.engine.Section - options: - show_root_heading: true - -::: sopa.io.report.engine.SubSection - options: - show_root_heading: true - -::: sopa.io.report.engine.NavbarItem - options: - show_root_heading: true - -::: sopa.io.report.engine.Navbar - options: - show_root_heading: true - -::: sopa.io.report.engine.Columns - options: - show_root_heading: true - -::: sopa.io.report.engine.Image - options: - show_root_heading: true - -::: sopa.io.report.engine.Root - options: - show_root_heading: true diff --git a/docs/api/segmentation/aggregate.md b/docs/api/segmentation/aggregate.md index 79d51e17..75223a61 100644 --- a/docs/api/segmentation/aggregate.md +++ b/docs/api/segmentation/aggregate.md @@ -1,23 +1,11 @@ ::: sopa.segmentation.aggregate.overlay_segmentation - options: - show_root_heading: true ::: sopa.segmentation.aggregate.average_channels - options: - show_root_heading: true ::: sopa.segmentation.aggregate._average_channels_aligned - options: - show_root_heading: true ::: sopa.segmentation.aggregate.count_transcripts - options: - show_root_heading: true ::: sopa.segmentation.aggregate._count_transcripts_aligned - options: - show_root_heading: true ::: sopa.segmentation.aggregate.Aggregator - options: - show_root_heading: true diff --git a/docs/api/segmentation/baysor.md b/docs/api/segmentation/baysor.md index 9704226e..f3b20822 100644 --- a/docs/api/segmentation/baysor.md +++ b/docs/api/segmentation/baysor.md @@ -1,3 +1 @@ ::: sopa.segmentation.baysor.resolve.resolve - options: - show_root_heading: true diff --git a/docs/api/segmentation/methods.md b/docs/api/segmentation/methods.md index 7392dc8b..4ac52707 100644 --- a/docs/api/segmentation/methods.md +++ b/docs/api/segmentation/methods.md @@ -1,3 +1 @@ ::: sopa.segmentation.methods.cellpose_patch - options: - show_root_heading: true diff --git a/docs/api/segmentation/patching.md b/docs/api/segmentation/patching.md index 048cc26b..80db969d 100644 --- a/docs/api/segmentation/patching.md +++ b/docs/api/segmentation/patching.md @@ -1,3 +1 @@ ::: sopa.segmentation.patching.Patches2D - options: - show_root_heading: true diff --git a/docs/api/segmentation/shapes.md b/docs/api/segmentation/shapes.md index eb6b217c..59443d0f 100644 --- a/docs/api/segmentation/shapes.md +++ b/docs/api/segmentation/shapes.md @@ -1,11 +1,5 @@ ::: sopa.segmentation.shapes.solve_conflicts - options: - show_root_heading: true ::: sopa.segmentation.shapes.geometrize - options: - show_root_heading: true ::: sopa.segmentation.shapes.rasterize - options: - show_root_heading: true diff --git a/docs/api/segmentation/stainings.md b/docs/api/segmentation/stainings.md index 8e59f172..31ccfd93 100644 --- a/docs/api/segmentation/stainings.md +++ b/docs/api/segmentation/stainings.md @@ -1,3 +1 @@ ::: sopa.segmentation.stainings.StainingSegmentation - options: - show_root_heading: true diff --git a/docs/api/segmentation/tissue.md b/docs/api/segmentation/tissue.md index 80cbedd4..d6f2eb63 100644 --- a/docs/api/segmentation/tissue.md +++ b/docs/api/segmentation/tissue.md @@ -1,3 +1 @@ ::: sopa.segmentation.tissue.hsv_otsu - options: - show_root_heading: true diff --git a/docs/api/spatial.md b/docs/api/spatial.md index b7823249..787b3e04 100644 --- a/docs/api/spatial.md +++ b/docs/api/spatial.md @@ -1,27 +1,13 @@ ::: sopa.spatial.sjoin - options: - show_root_heading: true ::: sopa.spatial.mean_distance - options: - show_root_heading: true ::: sopa.spatial.geometrize_niches - options: - show_root_heading: true ::: sopa.spatial.niches_geometry_stats - options: - show_root_heading: true ::: sopa.spatial.cells_to_groups - options: - show_root_heading: true ::: sopa.spatial.spatial_neighbors - options: - show_root_heading: true ::: sopa.spatial.prepare_network - options: - show_root_heading: true diff --git a/docs/api/utils/image.md b/docs/api/utils/image.md index 3c82fed1..99dc9ff8 100644 --- a/docs/api/utils/image.md +++ b/docs/api/utils/image.md @@ -1,11 +1,5 @@ ::: sopa.utils.image.scale_dtype - options: - show_root_heading: true ::: sopa.utils.image.resize - options: - show_root_heading: true ::: sopa.utils.image.resize_numpy - options: - show_root_heading: true diff --git a/docs/api/utils/polygon_crop.md b/docs/api/utils/polygon_crop.md index 8033eb97..a1ea0631 100644 --- a/docs/api/utils/polygon_crop.md +++ b/docs/api/utils/polygon_crop.md @@ -1,3 +1 @@ ::: sopa.utils.polygon_crop.polygon_selection - options: - show_root_heading: true diff --git a/docs/tutorials/align.md b/docs/tutorials/align.md index fbad2bbc..7b9fe706 100644 --- a/docs/tutorials/align.md +++ b/docs/tutorials/align.md @@ -5,11 +5,10 @@ Convert your image with QuPath as written in this [10x genomics webpage](https:/ If you are not familiar with QuPath, you can also use our API to write the image: ```python -from sopa import io -from sopa.io.explorer import write_image +import sopa.io -image = io.ome_tif("path/to/your/image.tif") # or use a different reader -write_image("where/to/save/image.ome.tif", image, is_dir=False) +image = sopa.io.ome_tif("path/to/your/image.tif") # or use a different reader +sopa.io.write_image("where/to/save/image.ome.tif", image, is_dir=False) ``` !!! note "Xenium users" diff --git a/mkdocs.yml b/mkdocs.yml index cac50b3f..6718bf24 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -31,11 +31,7 @@ nav: - sopa.segmentation.tissue: api/segmentation/tissue.md - sopa.segmentation.baysor: api/segmentation/baysor.md - sopa.io: api/io.md - - sopa.io.explorer: api/io.explorer.md - - sopa.io.report: api/io.report.md - - sopa.annotation: - - sopa.annotation.tangram: api/annotation/tangram.md - - sopa.annotation.fluorescence: api/annotation/fluorescence.md + - sopa.annotation: api/annotation.md - sopa.utils: - sopa.utils.image: api/utils/image.md - sopa.utils.polygon_crop: api/utils/polygon_crop.md @@ -45,7 +41,12 @@ nav: - Cite us: cite_us.md plugins: - search - - mkdocstrings + - mkdocstrings: + handlers: + python: + options: + show_root_heading: true + heading_level: 3 - mkdocs-jupyter: include_source: True markdown_extensions: diff --git a/pyproject.toml b/pyproject.toml index 657797fd..dbe2a0ea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "sopa" -version = "1.0.10" +version = "1.0.11" description = "Spatial-omics pipeline and analysis" documentation = "https://gustaveroussy.github.io/sopa" homepage = "https://gustaveroussy.github.io/sopa" diff --git a/sopa/annotation/__init__.py b/sopa/annotation/__init__.py index b9a503ef..26b88e85 100644 --- a/sopa/annotation/__init__.py +++ b/sopa/annotation/__init__.py @@ -1 +1,2 @@ from .fluorescence import preprocess_fluo, higher_z_score +from .tangram import tangram_annotate diff --git a/sopa/annotation/tangram/run.py b/sopa/annotation/tangram/run.py index b0e95185..f63d9447 100644 --- a/sopa/annotation/tangram/run.py +++ b/sopa/annotation/tangram/run.py @@ -10,13 +10,6 @@ from anndata import AnnData from spatialdata import SpatialData -try: - import tangram as tg -except ImportError: - raise ImportError( - "To use tangram, you need its corresponding sopa extra: `pip install 'sopa[tangram]'` (normal mode) or `pip install -e '.[tangram]'` (if using snakemake)" - ) - from sopa._constants import SopaKeys log = logging.getLogger(__name__) @@ -216,6 +209,13 @@ def run(self): ] def run_group(self, level: int = 0, indices_sp=None, indices_sc=None): + try: + import tangram as tg + except ImportError: + raise ImportError( + "To use tangram, you need its corresponding sopa extra: `pip install 'sopa[tangram]'` (normal mode) or `pip install -e '.[tangram]'` (if using snakemake)" + ) + if indices_sp is not None and len(indices_sp) == 0: log.warn("No cell annotated in the upper level...") return diff --git a/sopa/embedding/patches.py b/sopa/embedding/patches.py index bb6b8bb4..f843ed61 100644 --- a/sopa/embedding/patches.py +++ b/sopa/embedding/patches.py @@ -38,10 +38,11 @@ def _get_best_level_for_downsample( def _get_extraction_parameters( - tiff_metadata: dict, + slide_metadata: dict, patch_width: int, level: int | None, magnification: int | None, + backend: str, ) -> tuple[int, int, int, bool]: """ Given the metadata for the slide, a target magnification and a patch width, @@ -55,20 +56,20 @@ def _get_extraction_parameters( if magnification is None: return level, 1, patch_width * 2**level, True # TODO: what if scaling != 2? - if tiff_metadata["properties"].get("tiffslide.objective-power"): - objective_power = int(tiff_metadata["properties"].get("tiffslide.objective-power")) + if slide_metadata["properties"].get(f"{backend}.objective-power"): + objective_power = int(slide_metadata["properties"].get(f"{backend}.objective-power")) downsample = objective_power / magnification - elif tiff_metadata["properties"].get("tiffslide.mpp-x"): - mppx = float(tiff_metadata["properties"].get("tiffslide.mpp-x")) + elif slide_metadata["properties"].get(f"{backend}.mpp-x"): + mppx = float(slide_metadata["properties"].get(f"{backend}.mpp-x")) mpp_objective = min([80, 40, 20, 10, 5], key=lambda obj: abs(10 / obj - mppx)) downsample = mpp_objective / magnification else: return None, None, None, False - level = _get_best_level_for_downsample(tiff_metadata["level_downsamples"], downsample) - resize_factor = tiff_metadata["level_downsamples"][level] / downsample + level = _get_best_level_for_downsample(slide_metadata["level_downsamples"], downsample) + resize_factor = slide_metadata["level_downsamples"][level] / downsample patch_width = int(patch_width * downsample) return level, resize_factor, patch_width, True @@ -90,9 +91,9 @@ def __init__( self.cs = get_intrinsic_cs(None, image) - tiff_metadata = image.attrs.get("metadata", {}) + slide_metadata = image.attrs.get("metadata", {}) self.level, self.resize_factor, self.patch_width, success = _get_extraction_parameters( - tiff_metadata, patch_width, level, magnification + slide_metadata, patch_width, level, magnification, backend=image.attrs["backend"] ) if not success: log.error("Error retrieving the image mpp, skipping tile embedding.") diff --git a/sopa/io/__init__.py b/sopa/io/__init__.py index 91a82879..06bd492f 100644 --- a/sopa/io/__init__.py +++ b/sopa/io/__init__.py @@ -1,4 +1,17 @@ -from .explorer import write, align, add_explorer_selection +from .explorer import ( + write, + align, + add_explorer_selection, + write_image, + write_transcripts, + write_cell_categories, + write_gene_counts, + write_polygons, + write_metadata, + str_cell_id, + int_cell_id, + save_column_csv, +) from .standardize import write_standardized from .reader.cosmx import cosmx from .reader.merscope import merscope @@ -8,6 +21,7 @@ from .reader.hyperion import hyperion from .reader.utils import ome_tif from .reader.wsi import wsi, wsi_autoscale +from .reader.aics import aicsimageio from .report import write_report from ..utils.data import blobs, uniform diff --git a/sopa/io/reader/aics.py b/sopa/io/reader/aics.py new file mode 100644 index 00000000..11288b29 --- /dev/null +++ b/sopa/io/reader/aics.py @@ -0,0 +1,58 @@ +from __future__ import annotations + +import logging +from pathlib import Path + +import xarray as xr +from spatialdata import SpatialData +from spatialdata.models import Image2DModel + +from .utils import _default_image_kwargs + +log = logging.getLogger(__name__) + + +def aicsimageio( + path: Path, + z_stack: int = 0, + image_models_kwargs: dict | None = None, + aics_kwargs: dict | None = None, +) -> SpatialData: + """Read an image using [AICSImageIO](https://github.com/AllenCellModeling/aicsimageio). It supports special formats such as `ND2`, `CZI`, `LIF`, or `DV`. + + !!! note "Extra dependencies" + To use this reader, you'll need the `aicsimageio` dependency (`pip install aicsimageio`). To read `.czi` images, you'll also need to install `aicspylibczi` (for instance `pip install aicspylibczi`). + + Args: + path: Path to the image file + z_stack: (Only for 3D images) Index of the stack in the z-axis to use. + image_models_kwargs: Keyword arguments passed to `spatialdata.models.Image2DModel`. + aics_kwargs: Keyword arguments passed to `aicsimageio.AICSImage`. + + Returns: + A `SpatialData` object with a 2D-image of shape `(C, Y, X)` + """ + image_models_kwargs, _ = _default_image_kwargs(image_models_kwargs, None) + aics_kwargs = {} if aics_kwargs is None else aics_kwargs + + try: + from aicsimageio import AICSImage + except ImportError: + raise ImportError( + "You need to install aicsimageio, e.g. by running `pip install aicsimageio`" + ) + + xarr: xr.DataArray = AICSImage(path, **aics_kwargs).xarray_dask_data + + assert ( + len(xarr.coords["T"]) == 1 + ), f"Only one time dimension is supported, found {len(xarr.coords['T'])}." + + if len(xarr.coords["Z"]) > 1: + log.info(f"3D image found, only reading {z_stack:=}") + + xarr = xarr.isel(T=0, Z=z_stack).rename({"C": "c", "Y": "y", "X": "x"}) + + image = Image2DModel.parse(xarr, c_coords=xarr.coords["c"].values, **image_models_kwargs) + + return SpatialData(images={"image": image}) diff --git a/sopa/io/reader/wsi.py b/sopa/io/reader/wsi.py index f924a856..87baec1b 100644 --- a/sopa/io/reader/wsi.py +++ b/sopa/io/reader/wsi.py @@ -12,7 +12,10 @@ def wsi( - path: str | Path, chunks: tuple[int, int, int] = (3, 256, 256), as_image: bool = False + path: str | Path, + chunks: tuple[int, int, int] = (3, 256, 256), + as_image: bool = False, + backend: str = "tiffslide", ) -> SpatialData: """Read a WSI into a `SpatialData` object @@ -20,11 +23,12 @@ def wsi( path: Path to the WSI chunks: Tuple representing the chunksize for the dimensions `(C, Y, X)`. as_image: If `True`, returns a, image instead of a `SpatialData` object + backend: The library to use as a backend in order to load the WSI. One of: `"openslide"`, `"tiffslide"`. Returns: A `SpatialData` object with a multiscale 2D-image of shape `(C, Y, X)` """ - image_name, img, tiff, tiff_metadata = _open_wsi(path) + image_name, img, slide, slide_metadata = _open_wsi(path, backend=backend) images = {} for level, key in enumerate(list(img.keys())): @@ -35,10 +39,10 @@ def wsi( dims=("c", "y", "x"), ).chunk(chunks) - scale_factor = tiff.level_downsamples[level] + scale_factor = slide.level_downsamples[level] scale_image = Image2DModel.parse( - scale_image, + scale_image[:3, :, :], transformations={"pixels": _get_scale_transformation(scale_factor)}, c_coords=("r", "g", "b"), ) @@ -48,15 +52,14 @@ def wsi( images[f"scale{key}"] = scale_image multiscale_image = MultiscaleSpatialImage.from_dict(images) - multiscale_image.attrs["metadata"] = tiff_metadata + sdata = SpatialData(images={image_name: multiscale_image}) + sdata[image_name].attrs["metadata"] = slide_metadata + sdata[image_name].attrs["backend"] = backend + sdata[image_name].name = image_name if as_image: - multiscale_image.name = image_name return multiscale_image - sdata = SpatialData(images={image_name: multiscale_image}) - sdata[image_name].attrs["metadata"] = tiff_metadata - return sdata @@ -108,23 +111,38 @@ def _default_image_models_kwargs(image_models_kwargs: dict | None) -> dict: return image_models_kwargs -def _open_wsi(path: str | Path) -> tuple[str, xarray.Dataset, Any, dict]: - import tiffslide - +def _open_wsi( + path: str | Path, backend: str = "openslide" +) -> tuple[str, xarray.Dataset, Any, dict]: image_name = Path(path).stem - tiff = tiffslide.open_slide(path) - img = xarray.open_zarr( - tiff.zarr_group.store, - consolidated=False, - mask_and_scale=False, - ) + if backend == "tiffslide": + import tiffslide + + slide = tiffslide.open_slide(path) + zarr_store = slide.zarr_group.store + zarr_img = xarray.open_zarr( + zarr_store, + consolidated=False, + mask_and_scale=False, + ) + elif backend == "openslide": + import openslide + + from ...utils.io import OpenSlideStore + + slide = openslide.open_slide(path) + zarr_store = OpenSlideStore(path).store + else: + raise ValueError("Invalid backend. Supported options are 'openslide' and 'tiffslide'.") + + zarr_img = xarray.open_zarr(zarr_store, consolidated=False, mask_and_scale=False) - tiff_metadata = { - "properties": tiff.properties, - "dimensions": tiff.dimensions, - "level_count": tiff.level_count, - "level_dimensions": tiff.level_dimensions, - "level_downsamples": tiff.level_downsamples, + metadata = { + "properties": slide.properties, + "dimensions": slide.dimensions, + "level_count": slide.level_count, + "level_dimensions": slide.level_dimensions, + "level_downsamples": slide.level_downsamples, } - return image_name, img, tiff, tiff_metadata + return image_name, zarr_img, slide, metadata diff --git a/sopa/segmentation/tissue.py b/sopa/segmentation/tissue.py index 35a5cb00..3e7111d5 100644 --- a/sopa/segmentation/tissue.py +++ b/sopa/segmentation/tissue.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import geopandas as gpd diff --git a/sopa/utils/io.py b/sopa/utils/io.py new file mode 100644 index 00000000..4ed2d0f9 --- /dev/null +++ b/sopa/utils/io.py @@ -0,0 +1,154 @@ +# Adapted from https://github.com/manzt/napari-lazy-openslide/tree/main +from ctypes import ArgumentError +from pathlib import Path +from typing import Any, Dict, Mapping, MutableMapping + +import numpy as np +from openslide import OpenSlide +from zarr.storage import ( + KVStore, + Store, + _path_to_prefix, + attrs_key, + init_array, + init_group, +) +from zarr.util import json_dumps, normalize_shape, normalize_storage_path + + +def init_attrs(store: MutableMapping, attrs: Mapping[str, Any], path: str = None): + path = normalize_storage_path(path) + path = _path_to_prefix(path) + store[path + attrs_key] = json_dumps(attrs) + + +def create_meta_store(slide: OpenSlide, tilesize: int) -> Dict[str, bytes]: + """Creates a dict containing the zarr metadata for the multiscale openslide image.""" + store = dict() + root_attrs = { + "multiscales": [ + { + "name": Path(slide._filename).name, + "datasets": [{"path": str(i)} for i in range(slide.level_count)], + "version": "0.1", + } + ], + "metadata": dict(slide.properties), + } + init_group(store) + init_attrs(store, root_attrs) + + for i, (x, y) in enumerate(slide.level_dimensions): + init_array( + store, + path=str(i), + shape=normalize_shape((y, x, 4)), + chunks=(tilesize, tilesize, 4), + fill_value=0, + dtype="|u1", + compressor=None, + ) + suffix = str(i) if i != 0 else "" + init_attrs(store, {"_ARRAY_DIMENSIONS": [f"Y{suffix}", f"X{suffix}", "S"]}, path=str(i)) + return store + + +def _parse_chunk_path(path: str): + """Returns x,y chunk coords and pyramid level from string key""" + level, ckey = path.split("/") + y, x, _ = map(int, ckey.split(".")) + return x, y, int(level) + + +class OpenSlideStore(Store): + """Wraps an OpenSlide object as a multiscale Zarr Store. + + Parameters + ---------- + path: str + The file to open with OpenSlide. + tilesize: int + Desired "chunk" size for zarr store (default: 512). + """ + + def __init__(self, path: str, tilesize: int = 512): + self._path = path + self._slide = OpenSlide(path) + self._tilesize = tilesize + self._store = create_meta_store(self._slide, tilesize) + self._writeable = False + self._erasable = False + + def __getitem__(self, key: str): + if key in self._store: + # key is for metadata + return self._store[key] + + # key should now be a path to an array chunk + # e.g '3/4.5.0' -> '/' + try: + x, y, level = _parse_chunk_path(key) + location = self._ref_pos(x, y, level) + size = (self._tilesize, self._tilesize) + tile = self._slide.read_region(location, level, size) + except ArgumentError as err: + # Can occur if trying to read a closed slide + raise err + except Exception: + # TODO: probably need better error handling. + # If anything goes wrong, we just signal the chunk + # is missing from the store. + raise KeyError(key) + return np.array(tile) # .tobytes() + + def __eq__(self, other): + return isinstance(other, OpenSlideStore) and self._slide._filename == other._slide._filename + + def __setitem__(self, key, val): + raise PermissionError("ZarrStore is read-only") + + def __delitem__(self, key): + raise PermissionError("ZarrStore is read-only") + + def __iter__(self): + return iter(self.keys()) + + def __len__(self): + return sum(1 for _ in self) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.close() + + def _ref_pos(self, x: int, y: int, level: int): + dsample = self._slide.level_downsamples[level] + xref = int(x * dsample * self._tilesize) + yref = int(y * dsample * self._tilesize) + return xref, yref + + def keys(self): + return self._store.keys() + + def close(self): + self._slide.close() + + # Retrieved from napari-lazy-openslide PR#16 + def __getstate__(self): + return {"_path": self._path, "_tilesize": self._tilesize} + + def __setstate__(self, newstate): + path = newstate["_path"] + tilesize = newstate["_tilesize"] + self.__init__(path, tilesize) + + def rename(self): + raise PermissionError(f'{type(self)} is not erasable, cannot call "rename"') + + def rmdir(self, path: str = "") -> None: + raise PermissionError(f'{type(self)} is not erasable, cannot call "rmdir"') + + @property + def store(self) -> KVStore: + return KVStore(self)