From 706dea93d0fb0a2fd74a5aeb1d0854d129aa2990 Mon Sep 17 00:00:00 2001 From: Blampey Quentin Date: Sun, 18 Aug 2024 14:43:09 +0200 Subject: [PATCH 1/3] minor fix docs --- docs/api/segmentation/aggregate.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/api/segmentation/aggregate.md b/docs/api/segmentation/aggregate.md index 75223a61..6bee9935 100644 --- a/docs/api/segmentation/aggregate.md +++ b/docs/api/segmentation/aggregate.md @@ -1,11 +1,11 @@ -::: sopa.segmentation.aggregate.overlay_segmentation +::: sopa.segmentation.aggregation.overlay_segmentation -::: sopa.segmentation.aggregate.average_channels +::: sopa.segmentation.aggregation.average_channels -::: sopa.segmentation.aggregate._average_channels_aligned +::: sopa.segmentation.aggregation._average_channels_aligned -::: sopa.segmentation.aggregate.count_transcripts +::: sopa.segmentation.aggregation.count_transcripts -::: sopa.segmentation.aggregate._count_transcripts_aligned +::: sopa.segmentation.aggregation._count_transcripts_aligned -::: sopa.segmentation.aggregate.Aggregator +::: sopa.segmentation.aggregation.Aggregator From 6ccf51b95a15c5d1f56cb2b60b7b5e34ebe29670 Mon Sep 17 00:00:00 2001 From: Blampey Quentin Date: Sun, 18 Aug 2024 14:53:47 +0200 Subject: [PATCH 2/3] changelog version 1.1.3 --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 10d298d7..599c15e6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ ## [1.x.x] - 2024-xx-xx + + +## [1.1.3] - 2024-08-18 + ### Fix - Fixed aggregation issue when gene names are `NaN` or `None` (#101) - Fix Xenium reader for old Xenium data format (#105) @@ -7,11 +11,11 @@ ### Added - Support multipolygons in ROI rasterization - Added bins aggregation -- Added Visium HD reader (tutorial comming soon) +- Added Visium HD reader (tutorial coming soon) ### Changed - Import submodules in init (segmentation, io, utils) -- API simplification in progress (new API + tutorial comming soon) +- API simplification in progress (new API + tutorial coming soon) ## [1.1.2] - 2024-07-24 From d440a10e21fd58f65bf83565782e67bbb25870d9 Mon Sep 17 00:00:00 2001 From: Blampey Quentin Date: Wed, 21 Aug 2024 15:09:22 +0200 Subject: [PATCH 3/3] hot fixes --- CHANGELOG.md | 5 ++ pyproject.toml | 2 +- sopa/_constants.py | 1 + sopa/_sdata.py | 41 +++++++++--- sopa/cli/patchify.py | 2 +- sopa/io/reader/visium_hd.py | 12 +++- sopa/io/reader/xenium.py | 3 +- sopa/patches/patches.py | 30 ++++++++- sopa/utils/data.py | 3 +- .../config/toy/uniform_baysor_vizgen.yaml | 62 +++++++++++++++++++ 10 files changed, 146 insertions(+), 15 deletions(-) create mode 100644 workflow/config/toy/uniform_baysor_vizgen.yaml diff --git a/CHANGELOG.md b/CHANGELOG.md index 599c15e6..0bd247cf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ ## [1.x.x] - 2024-xx-xx +## [1.1.4] - 2024-08-21 + +### Hotfix +- Fixed Baysor issue on MERSCOPE data with the Vizgen prior +- Fix patch-maker issue due to new API temporary implementation ## [1.1.3] - 2024-08-18 diff --git a/pyproject.toml b/pyproject.toml index f8ff8f01..3feefa86 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "sopa" -version = "1.1.3" +version = "1.1.4" description = "Spatial-omics pipeline and analysis" documentation = "https://gustaveroussy.github.io/sopa" homepage = "https://gustaveroussy.github.io/sopa" diff --git a/sopa/_constants.py b/sopa/_constants.py index 61e07e38..63fd5d70 100644 --- a/sopa/_constants.py +++ b/sopa/_constants.py @@ -58,6 +58,7 @@ class ROI: class SopaFiles: SOPA_CACHE_DIR = ".sopa_cache" + TRANSCRIPT_TEMP_DIR = "transcript_patches" PATCHES_FILE_IMAGE = "patches_file_image" PATCHES_DIRS_BAYSOR = "patches_file_baysor" PATCHES_DIRS_COMSEG = "patches_file_comseg" diff --git a/sopa/_sdata.py b/sopa/_sdata.py index 21020e5e..8ec923d5 100644 --- a/sopa/_sdata.py +++ b/sopa/_sdata.py @@ -1,18 +1,19 @@ from __future__ import annotations import logging -from typing import Iterator +from pathlib import Path +from typing import Any, Iterator import geopandas as gpd import pandas as pd -import xarray as xr +from anndata import AnnData from datatree import DataTree from spatialdata import SpatialData from spatialdata.models import SpatialElement from spatialdata.transformations import Identity, get_transformation, set_transformation from xarray import DataArray -from ._constants import SopaAttrs, SopaKeys +from ._constants import SopaAttrs, SopaFiles, SopaKeys log = logging.getLogger(__name__) @@ -112,14 +113,14 @@ def get_intensities(sdata: SpatialData) -> pd.DataFrame | None: return adata.to_df() -def iter_scales(image: DataTree) -> Iterator[xr.DataArray]: +def iter_scales(image: DataTree) -> Iterator[DataArray]: """Iterates through all the scales of a `DataTree` Args: image: a `DataTree` Yields: - Each scale (as a `xr.DataArray`) + Each scale (as a `DataArray`) """ assert isinstance(image, DataTree), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}" @@ -154,7 +155,7 @@ def get_spatial_element( if len(element_dict) == 1: key = next(iter(element_dict.keys())) - assert valid_attr is None or element_dict[key].attrs.get( + assert valid_attr is None or _get_spatialdata_attrs(element_dict[key]).get( valid_attr, True ), f"Element {key} is not valid for the attribute {valid_attr}." @@ -162,7 +163,7 @@ def get_spatial_element( assert valid_attr is not None, "Multiple elements found. Provide an element key." - keys = [key for key, element in element_dict.items() if element.attrs.get(valid_attr)] + keys = [key for key, element in element_dict.items() if _get_spatialdata_attrs(element).get(valid_attr)] assert len(keys) > 0, f"No element with the attribute {valid_attr}. Provide an element key." assert len(keys) == 1, f"Multiple valid elements found: {keys}. Provide an element key." @@ -170,6 +171,26 @@ def get_spatial_element( return _return_element(element_dict, keys[0], return_key, as_spatial_image) +def _get_spatialdata_attrs(element: SpatialElement) -> dict[str, Any]: + if isinstance(element, DataTree): + element = next(iter(element["scale0"].values())) + return element.attrs.get("spatialdata_attrs", {}) + + +def _update_spatialdata_attrs(element: SpatialElement, attrs: dict): + if isinstance(element, DataTree): + for image_scale in iter_scales(element): + _update_spatialdata_attrs(image_scale, attrs) + return + + old_attrs = element.uns if isinstance(element, AnnData) else element.attrs + + if "spatialdata_attrs" not in old_attrs: + old_attrs["spatialdata_attrs"] = {} + + old_attrs["spatialdata_attrs"].update(attrs) + + def get_spatial_image( sdata: SpatialData, key: str | None = None, @@ -205,3 +226,9 @@ def _return_element( element = next(iter(element["scale0"].values())) return (key, element) if return_key else element + + +def get_cache_dir(sdata: SpatialData) -> Path: + assert sdata.is_backed(), "SpatialData not saved on-disk. Save the object, or provide a cache directory." + + return sdata.path / SopaFiles.SOPA_CACHE_DIR diff --git a/sopa/cli/patchify.py b/sopa/cli/patchify.py index dedd6ac6..87025509 100644 --- a/sopa/cli/patchify.py +++ b/sopa/cli/patchify.py @@ -202,7 +202,7 @@ def _patchify_transcripts( config or config_path is not None ), "Provide '--config-path', the path to a Baysor config file (toml) or comseg file (jsons)" - df_key = get_spatial_element(sdata.points) + df_key, _ = get_spatial_element(sdata.points, return_key=True) patches = Patches2D(sdata, df_key, patch_width_microns, patch_overlap_microns) valid_indices = patches.patchify_transcripts( temp_dir, diff --git a/sopa/io/reader/visium_hd.py b/sopa/io/reader/visium_hd.py index 99b5b1e3..88592831 100644 --- a/sopa/io/reader/visium_hd.py +++ b/sopa/io/reader/visium_hd.py @@ -7,6 +7,7 @@ from spatialdata_io.readers.visium_hd import visium_hd as visium_hd_spatialdata_io from ..._constants import SopaAttrs +from ..._sdata import _update_spatialdata_attrs from ...utils import string_channel_names from .utils import _default_image_kwargs @@ -44,14 +45,19 @@ def visium_hd( string_channel_names(sdata) # Ensure that channel names are strings + ### Add Sopa attributes to detect the spatial elements for key, image in sdata.images.items(): if key.endswith("_full_image"): - image.attrs[SopaAttrs.CELL_SEGMENTATION] = True + _update_spatialdata_attrs(image, {SopaAttrs.CELL_SEGMENTATION: True}) elif key.endswith("_hires_image"): - image.attrs[SopaAttrs.TISSUE_SEGMENTATION] = True + _update_spatialdata_attrs(image, {SopaAttrs.TISSUE_SEGMENTATION: True}) for key, geo_df in sdata.shapes.items(): if key.endswith("_002um"): - geo_df.attrs[SopaAttrs.BINS_AGGREGATION] = True + _update_spatialdata_attrs(geo_df, {SopaAttrs.BINS_AGGREGATION: True}) + + for key, table in sdata.tables.items(): + if key.endswith("_002um"): + _update_spatialdata_attrs(table, {SopaAttrs.BINS_TABLE: True}) return sdata diff --git a/sopa/io/reader/xenium.py b/sopa/io/reader/xenium.py index 167766a8..07ca46d3 100644 --- a/sopa/io/reader/xenium.py +++ b/sopa/io/reader/xenium.py @@ -7,6 +7,7 @@ from spatialdata_io.readers.xenium import xenium as xenium_spatialdata_io from ..._constants import SopaAttrs +from ..._sdata import _update_spatialdata_attrs from ...utils import string_channel_names from .utils import _default_image_kwargs @@ -56,6 +57,6 @@ def xenium( for key, image in sdata.images.items(): if key.startswith("morphology"): - image.attrs[SopaAttrs.CELL_SEGMENTATION] = True + _update_spatialdata_attrs(image, {SopaAttrs.CELL_SEGMENTATION: True}) return sdata diff --git a/sopa/patches/patches.py b/sopa/patches/patches.py index 8fe245ca..3e285014 100644 --- a/sopa/patches/patches.py +++ b/sopa/patches/patches.py @@ -20,6 +20,7 @@ from .._constants import EPS, ROI, SopaFiles, SopaKeys from .._sdata import ( get_boundaries, + get_cache_dir, get_spatial_element, get_spatial_image, to_intrinsic, @@ -435,6 +436,33 @@ def _assign_prior(series: dd.Series, unassigned_value: int | str | None) -> pd.S if series.dtype == "int": if unassigned_value is None or unassigned_value == 0: return series - return series.replace(unassigned_value, 0) + return series.replace(int(unassigned_value), 0) raise ValueError(f"Invalid dtype {series.dtype} for prior cell ids. Must be int or string.") + + +def make_image_patches( + sdata: SpatialData, patch_width: int = 2000, patch_overlap: int = 50, image_key: str | None = None +): + image_key, _ = get_spatial_image(sdata, key=image_key, return_key=True) + patches = Patches2D(sdata, image_key, patch_width=patch_width, patch_overlap=patch_overlap) + + patches.write() + + +def make_transcript_patches( + sdata: SpatialData, + config: dict = {}, + patch_width: int = 2000, + patch_overlap: int = 50, + points_key: str | None = None, + cache_dir: str | Path | None = None, +) -> list[int]: + points_key, _ = get_spatial_element(sdata, key=points_key, return_key=True) + patches = Patches2D(sdata, points_key, patch_width=patch_width, patch_overlap=patch_overlap) + + cache_dir = Path(cache_dir or get_cache_dir(sdata)) / SopaFiles.TRANSCRIPT_TEMP_DIR + + valid_indices = patches.patchify_transcripts(cache_dir, config=config) + + return valid_indices diff --git a/sopa/utils/data.py b/sopa/utils/data.py index 5b667744..a2807b8b 100644 --- a/sopa/utils/data.py +++ b/sopa/utils/data.py @@ -37,6 +37,7 @@ def uniform( include_image: bool = True, apply_blur: bool = True, as_output: bool = False, + transcript_cell_id_as_merscope: bool = False, ) -> SpatialData: """Generate a dummy dataset composed of cells generated uniformly in a square. It also has transcripts. @@ -149,7 +150,7 @@ def uniform( sdata = SpatialData(images=images, points=points, shapes=shapes) _map_transcript_to_cell(sdata, "cell_id", sdata["transcripts"], sdata["cells"]) - sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int) + sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int) - int(transcript_cell_id_as_merscope) if as_output: _add_table(sdata) diff --git a/workflow/config/toy/uniform_baysor_vizgen.yaml b/workflow/config/toy/uniform_baysor_vizgen.yaml new file mode 100644 index 00000000..a6b9db07 --- /dev/null +++ b/workflow/config/toy/uniform_baysor_vizgen.yaml @@ -0,0 +1,62 @@ +# For parameters details, see this commented example: https://github.com/gustaveroussy/sopa/blob/master/workflow/config/example_commented.yaml +read: + technology: uniform + kwargs: + transcript_cell_id_as_merscope: true + +patchify: + patch_width_pixel: 1200 + patch_overlap_pixel: 50 + patch_width_microns: 400 + patch_overlap_microns: 20 + +segmentation: + baysor: + min_area: 10 + cell_key: "cell_id" + unassigned_value: -1 + + config: + data: + force_2d: true + min_molecules_per_cell: 10 + x: "x" + y: "y" + z: "z" + gene: "genes" + min_molecules_per_gene: 0 + min_molecules_per_segment: 3 + confidence_nn_id: 6 + + segmentation: + scale: 3 # typical cell radius + scale_std: "25%" # cell radius standard deviation + prior_segmentation_confidence: 0 + estimate_scale_from_centers: false + n_clusters: 4 + iters: 500 + n_cells_init: 0 + nuclei_genes: "" + cyto_genes: "" + new_component_weight: 0.2 + new_component_fraction: 0.3 + +aggregate: + average_intensities: true + min_transcripts: 5 # [optional] cells whose transcript count is below that this threshold are filtered + +annotation: + method: fluorescence + args: + marker_cell_dict: + CK: Tumoral cell + CD3: T cell + CD20: B cell + +explorer: + gene_column: "genes" + ram_threshold_gb: 4 + pixel_size: 0.1 + +executables: + baysor: ~/.julia/bin/baysor