Skip to content

Commit

Permalink
Merge pull request #111 from gustaveroussy/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
quentinblampey authored Aug 21, 2024
2 parents 18da9a2 + d440a10 commit f152ab1
Show file tree
Hide file tree
Showing 11 changed files with 158 additions and 23 deletions.
13 changes: 11 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,17 +1,26 @@
## [1.x.x] - 2024-xx-xx

## [1.1.4] - 2024-08-21

### Hotfix
- Fixed Baysor issue on MERSCOPE data with the Vizgen prior
- Fix patch-maker issue due to new API temporary implementation


## [1.1.3] - 2024-08-18

### Fix
- Fixed aggregation issue when gene names are `NaN` or `None` (#101)
- Fix Xenium reader for old Xenium data format (#105)

### Added
- Support multipolygons in ROI rasterization
- Added bins aggregation
- Added Visium HD reader (tutorial comming soon)
- Added Visium HD reader (tutorial coming soon)

### Changed
- Import submodules in init (segmentation, io, utils)
- API simplification in progress (new API + tutorial comming soon)
- API simplification in progress (new API + tutorial coming soon)

## [1.1.2] - 2024-07-24

Expand Down
12 changes: 6 additions & 6 deletions docs/api/segmentation/aggregate.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
::: sopa.segmentation.aggregate.overlay_segmentation
::: sopa.segmentation.aggregation.overlay_segmentation

::: sopa.segmentation.aggregate.average_channels
::: sopa.segmentation.aggregation.average_channels

::: sopa.segmentation.aggregate._average_channels_aligned
::: sopa.segmentation.aggregation._average_channels_aligned

::: sopa.segmentation.aggregate.count_transcripts
::: sopa.segmentation.aggregation.count_transcripts

::: sopa.segmentation.aggregate._count_transcripts_aligned
::: sopa.segmentation.aggregation._count_transcripts_aligned

::: sopa.segmentation.aggregate.Aggregator
::: sopa.segmentation.aggregation.Aggregator
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "sopa"
version = "1.1.3"
version = "1.1.4"
description = "Spatial-omics pipeline and analysis"
documentation = "https://gustaveroussy.github.io/sopa"
homepage = "https://gustaveroussy.github.io/sopa"
Expand Down
1 change: 1 addition & 0 deletions sopa/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ class ROI:

class SopaFiles:
SOPA_CACHE_DIR = ".sopa_cache"
TRANSCRIPT_TEMP_DIR = "transcript_patches"
PATCHES_FILE_IMAGE = "patches_file_image"
PATCHES_DIRS_BAYSOR = "patches_file_baysor"
PATCHES_DIRS_COMSEG = "patches_file_comseg"
Expand Down
41 changes: 34 additions & 7 deletions sopa/_sdata.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,19 @@
from __future__ import annotations

import logging
from typing import Iterator
from pathlib import Path
from typing import Any, Iterator

import geopandas as gpd
import pandas as pd
import xarray as xr
from anndata import AnnData
from datatree import DataTree
from spatialdata import SpatialData
from spatialdata.models import SpatialElement
from spatialdata.transformations import Identity, get_transformation, set_transformation
from xarray import DataArray

from ._constants import SopaAttrs, SopaKeys
from ._constants import SopaAttrs, SopaFiles, SopaKeys

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -112,14 +113,14 @@ def get_intensities(sdata: SpatialData) -> pd.DataFrame | None:
return adata.to_df()


def iter_scales(image: DataTree) -> Iterator[xr.DataArray]:
def iter_scales(image: DataTree) -> Iterator[DataArray]:
"""Iterates through all the scales of a `DataTree`
Args:
image: a `DataTree`
Yields:
Each scale (as a `xr.DataArray`)
Each scale (as a `DataArray`)
"""
assert isinstance(image, DataTree), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}"

Expand Down Expand Up @@ -154,22 +155,42 @@ def get_spatial_element(
if len(element_dict) == 1:
key = next(iter(element_dict.keys()))

assert valid_attr is None or element_dict[key].attrs.get(
assert valid_attr is None or _get_spatialdata_attrs(element_dict[key]).get(
valid_attr, True
), f"Element {key} is not valid for the attribute {valid_attr}."

return _return_element(element_dict, key, return_key, as_spatial_image)

assert valid_attr is not None, "Multiple elements found. Provide an element key."

keys = [key for key, element in element_dict.items() if element.attrs.get(valid_attr)]
keys = [key for key, element in element_dict.items() if _get_spatialdata_attrs(element).get(valid_attr)]

assert len(keys) > 0, f"No element with the attribute {valid_attr}. Provide an element key."
assert len(keys) == 1, f"Multiple valid elements found: {keys}. Provide an element key."

return _return_element(element_dict, keys[0], return_key, as_spatial_image)


def _get_spatialdata_attrs(element: SpatialElement) -> dict[str, Any]:
if isinstance(element, DataTree):
element = next(iter(element["scale0"].values()))
return element.attrs.get("spatialdata_attrs", {})


def _update_spatialdata_attrs(element: SpatialElement, attrs: dict):
if isinstance(element, DataTree):
for image_scale in iter_scales(element):
_update_spatialdata_attrs(image_scale, attrs)
return

old_attrs = element.uns if isinstance(element, AnnData) else element.attrs

if "spatialdata_attrs" not in old_attrs:
old_attrs["spatialdata_attrs"] = {}

old_attrs["spatialdata_attrs"].update(attrs)


def get_spatial_image(
sdata: SpatialData,
key: str | None = None,
Expand Down Expand Up @@ -205,3 +226,9 @@ def _return_element(
element = next(iter(element["scale0"].values()))

return (key, element) if return_key else element


def get_cache_dir(sdata: SpatialData) -> Path:
assert sdata.is_backed(), "SpatialData not saved on-disk. Save the object, or provide a cache directory."

return sdata.path / SopaFiles.SOPA_CACHE_DIR
2 changes: 1 addition & 1 deletion sopa/cli/patchify.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def _patchify_transcripts(
config or config_path is not None
), "Provide '--config-path', the path to a Baysor config file (toml) or comseg file (jsons)"

df_key = get_spatial_element(sdata.points)
df_key, _ = get_spatial_element(sdata.points, return_key=True)
patches = Patches2D(sdata, df_key, patch_width_microns, patch_overlap_microns)
valid_indices = patches.patchify_transcripts(
temp_dir,
Expand Down
12 changes: 9 additions & 3 deletions sopa/io/reader/visium_hd.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from spatialdata_io.readers.visium_hd import visium_hd as visium_hd_spatialdata_io

from ..._constants import SopaAttrs
from ..._sdata import _update_spatialdata_attrs
from ...utils import string_channel_names
from .utils import _default_image_kwargs

Expand Down Expand Up @@ -44,14 +45,19 @@ def visium_hd(

string_channel_names(sdata) # Ensure that channel names are strings

### Add Sopa attributes to detect the spatial elements
for key, image in sdata.images.items():
if key.endswith("_full_image"):
image.attrs[SopaAttrs.CELL_SEGMENTATION] = True
_update_spatialdata_attrs(image, {SopaAttrs.CELL_SEGMENTATION: True})
elif key.endswith("_hires_image"):
image.attrs[SopaAttrs.TISSUE_SEGMENTATION] = True
_update_spatialdata_attrs(image, {SopaAttrs.TISSUE_SEGMENTATION: True})

for key, geo_df in sdata.shapes.items():
if key.endswith("_002um"):
geo_df.attrs[SopaAttrs.BINS_AGGREGATION] = True
_update_spatialdata_attrs(geo_df, {SopaAttrs.BINS_AGGREGATION: True})

for key, table in sdata.tables.items():
if key.endswith("_002um"):
_update_spatialdata_attrs(table, {SopaAttrs.BINS_TABLE: True})

return sdata
3 changes: 2 additions & 1 deletion sopa/io/reader/xenium.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from spatialdata_io.readers.xenium import xenium as xenium_spatialdata_io

from ..._constants import SopaAttrs
from ..._sdata import _update_spatialdata_attrs
from ...utils import string_channel_names
from .utils import _default_image_kwargs

Expand Down Expand Up @@ -56,6 +57,6 @@ def xenium(

for key, image in sdata.images.items():
if key.startswith("morphology"):
image.attrs[SopaAttrs.CELL_SEGMENTATION] = True
_update_spatialdata_attrs(image, {SopaAttrs.CELL_SEGMENTATION: True})

return sdata
30 changes: 29 additions & 1 deletion sopa/patches/patches.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from .._constants import EPS, ROI, SopaFiles, SopaKeys
from .._sdata import (
get_boundaries,
get_cache_dir,
get_spatial_element,
get_spatial_image,
to_intrinsic,
Expand Down Expand Up @@ -435,6 +436,33 @@ def _assign_prior(series: dd.Series, unassigned_value: int | str | None) -> pd.S
if series.dtype == "int":
if unassigned_value is None or unassigned_value == 0:
return series
return series.replace(unassigned_value, 0)
return series.replace(int(unassigned_value), 0)

raise ValueError(f"Invalid dtype {series.dtype} for prior cell ids. Must be int or string.")


def make_image_patches(
sdata: SpatialData, patch_width: int = 2000, patch_overlap: int = 50, image_key: str | None = None
):
image_key, _ = get_spatial_image(sdata, key=image_key, return_key=True)
patches = Patches2D(sdata, image_key, patch_width=patch_width, patch_overlap=patch_overlap)

patches.write()


def make_transcript_patches(
sdata: SpatialData,
config: dict = {},
patch_width: int = 2000,
patch_overlap: int = 50,
points_key: str | None = None,
cache_dir: str | Path | None = None,
) -> list[int]:
points_key, _ = get_spatial_element(sdata, key=points_key, return_key=True)
patches = Patches2D(sdata, points_key, patch_width=patch_width, patch_overlap=patch_overlap)

cache_dir = Path(cache_dir or get_cache_dir(sdata)) / SopaFiles.TRANSCRIPT_TEMP_DIR

valid_indices = patches.patchify_transcripts(cache_dir, config=config)

return valid_indices
3 changes: 2 additions & 1 deletion sopa/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def uniform(
include_image: bool = True,
apply_blur: bool = True,
as_output: bool = False,
transcript_cell_id_as_merscope: bool = False,
) -> SpatialData:
"""Generate a dummy dataset composed of cells generated uniformly in a square. It also has transcripts.
Expand Down Expand Up @@ -149,7 +150,7 @@ def uniform(
sdata = SpatialData(images=images, points=points, shapes=shapes)

_map_transcript_to_cell(sdata, "cell_id", sdata["transcripts"], sdata["cells"])
sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int)
sdata["transcripts"]["cell_id"] = sdata["transcripts"]["cell_id"].astype(int) - int(transcript_cell_id_as_merscope)

if as_output:
_add_table(sdata)
Expand Down
62 changes: 62 additions & 0 deletions workflow/config/toy/uniform_baysor_vizgen.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# For parameters details, see this commented example: https://github.com/gustaveroussy/sopa/blob/master/workflow/config/example_commented.yaml
read:
technology: uniform
kwargs:
transcript_cell_id_as_merscope: true

patchify:
patch_width_pixel: 1200
patch_overlap_pixel: 50
patch_width_microns: 400
patch_overlap_microns: 20

segmentation:
baysor:
min_area: 10
cell_key: "cell_id"
unassigned_value: -1

config:
data:
force_2d: true
min_molecules_per_cell: 10
x: "x"
y: "y"
z: "z"
gene: "genes"
min_molecules_per_gene: 0
min_molecules_per_segment: 3
confidence_nn_id: 6

segmentation:
scale: 3 # typical cell radius
scale_std: "25%" # cell radius standard deviation
prior_segmentation_confidence: 0
estimate_scale_from_centers: false
n_clusters: 4
iters: 500
n_cells_init: 0
nuclei_genes: ""
cyto_genes: ""
new_component_weight: 0.2
new_component_fraction: 0.3

aggregate:
average_intensities: true
min_transcripts: 5 # [optional] cells whose transcript count is below that this threshold are filtered

annotation:
method: fluorescence
args:
marker_cell_dict:
CK: Tumoral cell
CD3: T cell
CD20: B cell

explorer:
gene_column: "genes"
ram_threshold_gb: 4
pixel_size: 0.1

executables:
baysor: ~/.julia/bin/baysor

0 comments on commit f152ab1

Please sign in to comment.