Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #108

Merged
merged 13 commits into from
Aug 18, 2024
15 changes: 15 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,18 @@
## [1.x.x] - 2024-xx-xx

### Fix
- Fixed aggregation issue when gene names are `NaN` or `None` (#101)
- Fix Xenium reader for old Xenium data format (#105)

### Added
- Support multipolygons in ROI rasterization
- Added bins aggregation
- Added Visium HD reader (tutorial comming soon)

### Changed
- Import submodules in init (segmentation, io, utils)
- API simplification in progress (new API + tutorial comming soon)

## [1.1.2] - 2024-07-24

### Fix
Expand Down
155 changes: 92 additions & 63 deletions docs/tutorials/api_usage.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion docs/tutorials/comseg.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
"\n",
"First, follow the original [CLI tutorial](https://gustaveroussy.github.io/sopa/tutorials/cli_usage/) until you finished the \"Cellpose segmentation\" section, and then, continue below.\n",
"\n",
"#### 1. Save a ComSeg config file as a `config.json` file\n",
"#### 1. Save a ComSeg config file as a config.json file\n",
"\n",
"We display below a minimal example of a ComSeg `config.json` file\n",
"\n",
Expand Down
6 changes: 2 additions & 4 deletions docs/tutorials/xenium_explorer/explorer.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,6 @@
"outputs": [],
"source": [
"import sopa\n",
"import sopa.io\n",
"import sopa.segmentation\n",
"import spatialdata"
]
},
Expand Down Expand Up @@ -175,7 +173,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use the coordinates of a lasso selection in `SpatialData`\n",
"## Use the coordinates of a lasso selection in SpatialData\n",
"\n",
"On the Xenium Explorer, you can use the Lasso or Rectangular selection tools to select some regions of interest. Then, you'll be able to analyze back this region of interest using `spatialdata`.\n",
"\n",
Expand Down Expand Up @@ -252,7 +250,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Cropping a `SpatialData` object from a selection\n",
"### Cropping a SpatialData object from a selection\n",
"\n",
"You can also export the whole selection as a polygon and use it to crop the `spatialdata` object. For that, click on \"Download Selection Coordinates as CSV\", as below. It will create a file called `\"Selection_1_coordinates.csv\"`.\n",
"\n",
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "sopa"
version = "1.1.2"
version = "1.1.3"
description = "Spatial-omics pipeline and analysis"
documentation = "https://gustaveroussy.github.io/sopa"
homepage = "https://gustaveroussy.github.io/sopa"
Expand Down Expand Up @@ -76,7 +76,7 @@ testpaths = ["tests"]
python_files = "test_*.py"

[tool.black]
line-length = 100
line-length = 120
include = '\.pyi?$'
exclude = '''
/(
Expand Down
9 changes: 9 additions & 0 deletions sopa/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,17 @@
import importlib.metadata
import logging
import sys
from ._logging import configure_logger

__version__ = importlib.metadata.version("sopa")

log = logging.getLogger("sopa")
configure_logger(log)

if "--help" not in sys.argv:
from . import utils
from . import io
from . import segmentation

from .segmentation import tissue_segmentation
from ._sdata import get_spatial_image, get_spatial_element, to_intrinsic
9 changes: 9 additions & 0 deletions sopa/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ class SopaKeys:
GEOMETRY_COUNT = "n_components"


class SopaAttrs:
CELL_SEGMENTATION = "cell_segmentation_image"
TISSUE_SEGMENTATION = "tissue_segmentation_image"
BINS_AGGREGATION = "bins_aggregation_shapes"
BINS_TABLE = "bins_table"
TRANSCRIPTS = "transcripts_dataframe"
GENE_COLUMN = "feature_key"


VALID_DIMENSIONS = ("c", "y", "x")
LOW_AVERAGE_COUNT = 0.01
EPS = 1e-5
Expand Down
122 changes: 70 additions & 52 deletions sopa/_sdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from spatialdata.transformations import Identity, get_transformation, set_transformation
from xarray import DataArray

from ._constants import SopaKeys
from ._constants import SopaAttrs, SopaKeys

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -40,9 +40,7 @@ def get_boundaries(
if res is not None:
return res

error_message = (
"sdata object has no valid segmentation boundary. Consider running Sopa segmentation first."
)
error_message = "sdata object has no valid segmentation boundary. Consider running Sopa segmentation first."

if not warn:
raise ValueError(error_message)
Expand All @@ -51,17 +49,13 @@ def get_boundaries(
return (None, None) if return_key else None


def _try_get_boundaries(
sdata: SpatialData, shapes_key: str, return_key: bool
) -> gpd.GeoDataFrame | None:
def _try_get_boundaries(sdata: SpatialData, shapes_key: str, return_key: bool) -> gpd.GeoDataFrame | None:
"""Try to get a cell boundaries for a given `shapes_key`"""
if shapes_key in sdata.shapes:
return (shapes_key, sdata[shapes_key]) if return_key else sdata[shapes_key]


def get_intrinsic_cs(
sdata: SpatialData, element: SpatialElement | str, name: str | None = None
) -> str:
def get_intrinsic_cs(sdata: SpatialData, element: SpatialElement | str, name: str | None = None) -> str:
"""Gets the name of the intrinsic coordinate system of an element

Args:
Expand All @@ -86,9 +80,7 @@ def get_intrinsic_cs(
return name


def to_intrinsic(
sdata: SpatialData, element: SpatialElement | str, element_cs: SpatialElement | str
) -> SpatialElement:
def to_intrinsic(sdata: SpatialData, element: SpatialElement | str, element_cs: SpatialElement | str) -> SpatialElement:
"""Transforms a `SpatialElement` into the intrinsic coordinate system of another `SpatialElement`

Args:
Expand All @@ -105,32 +97,6 @@ def to_intrinsic(
return sdata.transform_element_to_coordinate_system(element, cs)


def get_key(sdata: SpatialData, attr: str, key: str | None = None):
if key is not None:
return key

elements = getattr(sdata, attr)

if not len(elements):
return None

assert (
len(elements) == 1
), f"Trying to get an element key of `sdata.{attr}`, but it contains multiple values and no dict key was provided"

return next(iter(elements.keys()))


def get_element(sdata: SpatialData, attr: str, key: str | None = None):
key = get_key(sdata, attr, key)
return sdata[key] if key is not None else None


def get_item(sdata: SpatialData, attr: str, key: str | None = None):
key = get_key(sdata, attr, key)
return key, sdata[key] if key is not None else None


def get_intensities(sdata: SpatialData) -> pd.DataFrame | None:
"""Gets the intensity dataframe of shape `n_obs x n_channels`"""
assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"
Expand All @@ -155,35 +121,87 @@ def iter_scales(image: DataTree) -> Iterator[xr.DataArray]:
Yields:
Each scale (as a `xr.DataArray`)
"""
assert isinstance(
image, DataTree
), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}"
assert isinstance(image, DataTree), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}"

for scale in image:
yield next(iter(image[scale].values()))


def get_spatial_element(
element_dict: dict[str, SpatialElement],
key: str | None = None,
valid_attr: str | None = None,
return_key: bool = False,
as_spatial_image: bool = False,
) -> SpatialElement | tuple[str, SpatialElement]:
"""Gets an element from a SpatialData object.

Args:
sdata: SpatialData object.
key: Optional element key. If `None`, returns the only element (if only one), or tries to find an element with `valid_attr`.
return_key: Whether to also return the key of the element.
valid_attr: Attribute that the element must have to be considered valid.
as_spatial_image: Whether to return the element as a `SpatialImage` (if it is a `DataTree`)

Returns:
If `return_key` is False, only the element is returned, else a tuple `(element_key, element)`
"""
assert len(element_dict), "No spatial element was found in the dict."

if key is not None:
return _return_element(element_dict, key, return_key, as_spatial_image)

if len(element_dict) == 1:
key = next(iter(element_dict.keys()))

assert valid_attr is None or element_dict[key].attrs.get(
valid_attr, True
), f"Element {key} is not valid for the attribute {valid_attr}."

return _return_element(element_dict, key, return_key, as_spatial_image)

assert valid_attr is not None, "Multiple elements found. Provide an element key."

keys = [key for key, element in element_dict.items() if element.attrs.get(valid_attr)]

assert len(keys) > 0, f"No element with the attribute {valid_attr}. Provide an element key."
assert len(keys) == 1, f"Multiple valid elements found: {keys}. Provide an element key."

return _return_element(element_dict, keys[0], return_key, as_spatial_image)


def get_spatial_image(
sdata: SpatialData, key: str | None = None, return_key: bool = False
sdata: SpatialData,
key: str | None = None,
return_key: bool = False,
valid_attr: str = SopaAttrs.CELL_SEGMENTATION,
) -> DataArray | tuple[str, DataArray]:
"""Gets a DataArray from a SpatialData object (if the image has multiple scale, the `scale0` is returned)

Args:
sdata: SpatialData object.
key: Optional image key. If `None`, returns the only image (if only one), or raises an error.
key: Optional image key. If `None`, returns the only image (if only one), or tries to find an image with `valid_attr`.
return_key: Whether to also return the key of the image.
valid_attr: Attribute that the image must have to be considered valid.

Returns:
If `return_key` is False, only the image is returned, else a tuple `(image_key, image)`
"""
key = get_key(sdata, "images", key)
return get_spatial_element(
sdata.images,
key=key,
valid_attr=valid_attr,
return_key=return_key,
as_spatial_image=True,
)


assert key is not None, "One image in `sdata.images` is required"
def _return_element(
element_dict: dict[str, SpatialElement], key: str, return_key: bool, as_spatial_image: bool
) -> SpatialElement | tuple[str, SpatialElement]:
element = element_dict[key]

image = sdata.images[key]
if isinstance(image, DataTree):
image = next(iter(image["scale0"].values()))
if as_spatial_image and isinstance(element, DataTree):
element = next(iter(element["scale0"].values()))

if return_key:
return key, image
return image
return (key, element) if return_key else element
11 changes: 3 additions & 8 deletions sopa/annotation/tangram/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,7 @@ def init_obsm(self, level: int):
)

def get_hard_labels(self, df: pd.DataFrame) -> pd.Series:
df = df.clip(
df.quantile(1 - self.clip_percentile), df.quantile(self.clip_percentile), axis=1
)
df = df.clip(df.quantile(1 - self.clip_percentile), df.quantile(self.clip_percentile), axis=1)
df = (df - df.min()) / (df.max() - df.min())
return df.idxmax(1)

Expand All @@ -138,9 +136,7 @@ def pp_adata(self, ad_sp_: AnnData, ad_sc_: AnnData, split: np.ndarray) -> AnnDa
sc.pp.filter_genes(ad_sp_split, min_cells=1)

# Calculate uniform density prior as 1/number_of_spots
ad_sp_split.obs["uniform_density"] = (
np.ones(ad_sp_split.X.shape[0]) / ad_sp_split.X.shape[0]
)
ad_sp_split.obs["uniform_density"] = np.ones(ad_sp_split.X.shape[0]) / ad_sp_split.X.shape[0]

# Calculate rna_count_based density prior as % of rna molecule count
rna_count_per_spot = np.array(ad_sp_split.X.sum(axis=1)).squeeze()
Expand All @@ -157,8 +153,7 @@ def pp_adata(self, ad_sp_: AnnData, ad_sc_: AnnData, split: np.ndarray) -> AnnDa
)

selection = list(
set(ad_sp_split.var_names[ad_sp_split.var.counts > 0])
& set(ad_sc_.var_names[ad_sc_.var.counts > 0])
set(ad_sp_split.var_names[ad_sp_split.var.counts > 0]) & set(ad_sc_.var_names[ad_sc_.var.counts > 0])
)

assert len(
Expand Down
8 changes: 2 additions & 6 deletions sopa/cli/annotate.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,7 @@
def fluorescence(
sdata_path: str = typer.Argument(help=SDATA_HELPER),
marker_cell_dict: str = typer.Option(callback=ast.literal_eval),
cell_type_key: str = typer.Option(
"cell_type", help="Key added in `adata.obs` corresponding to the cell type"
),
cell_type_key: str = typer.Option("cell_type", help="Key added in `adata.obs` corresponding to the cell type"),
):
"""Simple annotation based on fluorescence, where each provided channel corresponds to one cell type.

Expand All @@ -39,9 +37,7 @@ def fluorescence(
@app_annotate.command()
def tangram(
sdata_path: str = typer.Argument(help=SDATA_HELPER),
sc_reference_path: str = typer.Option(
help="Path to the scRNAseq annotated reference, as a `.h5ad` file"
),
sc_reference_path: str = typer.Option(help="Path to the scRNAseq annotated reference, as a `.h5ad` file"),
cell_type_key: str = typer.Option(help="Key of `adata_ref.obs` containing the cell-types"),
reference_preprocessing: str = typer.Option(
None,
Expand Down
Loading
Loading