gustaveroussy · quentinblampey · Aug 18, 2024 · Aug 2, 2024 · Aug 2, 2024 · Aug 3, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,18 @@
+## [1.x.x] - 2024-xx-xx
+
+### Fix
+- Fixed aggregation issue when gene names are `NaN` or `None` (#101)
+- Fix Xenium reader for old Xenium data format (#105)
+
+### Added
+- Support multipolygons in ROI rasterization
+- Added bins aggregation
+- Added Visium HD reader (tutorial comming soon)
+
+### Changed
+- Import submodules in init (segmentation, io, utils)
+- API simplification in progress (new API + tutorial comming soon)
+
 ## [1.1.2] - 2024-07-24
 
 ### Fix

diff --git a/docs/tutorials/api_usage.ipynb b/docs/tutorials/api_usage.ipynb
diff --git a/docs/tutorials/comseg.ipynb b/docs/tutorials/comseg.ipynb
@@ -35,7 +35,7 @@
     "\n",
     "First, follow the original [CLI tutorial](https://gustaveroussy.github.io/sopa/tutorials/cli_usage/) until you finished the \"Cellpose segmentation\" section, and then, continue below.\n",
     "\n",
-    "####  1. Save a ComSeg config file as a `config.json` file\n",
+    "####  1. Save a ComSeg config file as a config.json file\n",
     "\n",
     "We display below a minimal example of a ComSeg `config.json` file\n",
     "\n",

diff --git a/docs/tutorials/xenium_explorer/explorer.ipynb b/docs/tutorials/xenium_explorer/explorer.ipynb
@@ -18,8 +18,6 @@
    "outputs": [],
    "source": [
     "import sopa\n",
-    "import sopa.io\n",
-    "import sopa.segmentation\n",
     "import spatialdata"
    ]
   },
@@ -175,7 +173,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Use the coordinates of a lasso selection in `SpatialData`\n",
+    "## Use the coordinates of a lasso selection in SpatialData\n",
     "\n",
     "On the Xenium Explorer, you can use the Lasso or Rectangular selection tools to select some regions of interest. Then, you'll be able to analyze back this region of interest using `spatialdata`.\n",
     "\n",
@@ -252,7 +250,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Cropping a `SpatialData` object from a selection\n",
+    "### Cropping a SpatialData object from a selection\n",
     "\n",
     "You can also export the whole selection as a polygon and use it to crop the `spatialdata` object. For that, click on \"Download Selection Coordinates as CSV\", as below. It will create a file called `\"Selection_1_coordinates.csv\"`.\n",
     "\n",

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "sopa"
-version = "1.1.2"
+version = "1.1.3"
 description = "Spatial-omics pipeline and analysis"
 documentation = "https://gustaveroussy.github.io/sopa"
 homepage = "https://gustaveroussy.github.io/sopa"
@@ -76,7 +76,7 @@ testpaths = ["tests"]
 python_files = "test_*.py"
 
 [tool.black]
-line-length = 100
+line-length = 120
 include = '\.pyi?$'
 exclude = '''
 /(

diff --git a/sopa/__init__.py b/sopa/__init__.py
@@ -1,8 +1,17 @@
 import importlib.metadata
 import logging
+import sys
 from ._logging import configure_logger
 
 __version__ = importlib.metadata.version("sopa")
 
 log = logging.getLogger("sopa")
 configure_logger(log)
+
+if "--help" not in sys.argv:
+    from . import utils
+    from . import io
+    from . import segmentation
+
+    from .segmentation import tissue_segmentation
+    from ._sdata import get_spatial_image, get_spatial_element, to_intrinsic
diff --git a/sopa/_constants.py b/sopa/_constants.py
@@ -33,6 +33,15 @@ class SopaKeys:
     GEOMETRY_COUNT = "n_components"
 
 
+class SopaAttrs:
+    CELL_SEGMENTATION = "cell_segmentation_image"
+    TISSUE_SEGMENTATION = "tissue_segmentation_image"
+    BINS_AGGREGATION = "bins_aggregation_shapes"
+    BINS_TABLE = "bins_table"
+    TRANSCRIPTS = "transcripts_dataframe"
+    GENE_COLUMN = "feature_key"
+
+
 VALID_DIMENSIONS = ("c", "y", "x")
 LOW_AVERAGE_COUNT = 0.01
 EPS = 1e-5

diff --git a/sopa/_sdata.py b/sopa/_sdata.py
@@ -12,7 +12,7 @@
 from spatialdata.transformations import Identity, get_transformation, set_transformation
 from xarray import DataArray
 
-from ._constants import SopaKeys
+from ._constants import SopaAttrs, SopaKeys
 
 log = logging.getLogger(__name__)
 
@@ -40,9 +40,7 @@ def get_boundaries(
         if res is not None:
             return res
 
-    error_message = (
-        "sdata object has no valid segmentation boundary. Consider running Sopa segmentation first."
-    )
+    error_message = "sdata object has no valid segmentation boundary. Consider running Sopa segmentation first."
 
     if not warn:
         raise ValueError(error_message)
@@ -51,17 +49,13 @@ def get_boundaries(
     return (None, None) if return_key else None
 
 
-def _try_get_boundaries(
-    sdata: SpatialData, shapes_key: str, return_key: bool
-) -> gpd.GeoDataFrame | None:
+def _try_get_boundaries(sdata: SpatialData, shapes_key: str, return_key: bool) -> gpd.GeoDataFrame | None:
     """Try to get a cell boundaries for a given `shapes_key`"""
     if shapes_key in sdata.shapes:
         return (shapes_key, sdata[shapes_key]) if return_key else sdata[shapes_key]
 
 
-def get_intrinsic_cs(
-    sdata: SpatialData, element: SpatialElement | str, name: str | None = None
-) -> str:
+def get_intrinsic_cs(sdata: SpatialData, element: SpatialElement | str, name: str | None = None) -> str:
     """Gets the name of the intrinsic coordinate system of an element
 
     Args:
@@ -86,9 +80,7 @@ def get_intrinsic_cs(
     return name
 
 
-def to_intrinsic(
-    sdata: SpatialData, element: SpatialElement | str, element_cs: SpatialElement | str
-) -> SpatialElement:
+def to_intrinsic(sdata: SpatialData, element: SpatialElement | str, element_cs: SpatialElement | str) -> SpatialElement:
     """Transforms a `SpatialElement` into the intrinsic coordinate system of another `SpatialElement`
 
     Args:
@@ -105,32 +97,6 @@ def to_intrinsic(
     return sdata.transform_element_to_coordinate_system(element, cs)
 
 
-def get_key(sdata: SpatialData, attr: str, key: str | None = None):
-    if key is not None:
-        return key
-
-    elements = getattr(sdata, attr)
-
-    if not len(elements):
-        return None
-
-    assert (
-        len(elements) == 1
-    ), f"Trying to get an element key of `sdata.{attr}`, but it contains multiple values and no dict key was provided"
-
-    return next(iter(elements.keys()))
-
-
-def get_element(sdata: SpatialData, attr: str, key: str | None = None):
-    key = get_key(sdata, attr, key)
-    return sdata[key] if key is not None else None
-
-
-def get_item(sdata: SpatialData, attr: str, key: str | None = None):
-    key = get_key(sdata, attr, key)
-    return key, sdata[key] if key is not None else None
-
-
 def get_intensities(sdata: SpatialData) -> pd.DataFrame | None:
     """Gets the intensity dataframe of shape `n_obs x n_channels`"""
     assert SopaKeys.TABLE in sdata.tables, f"No '{SopaKeys.TABLE}' found in sdata.tables"
@@ -155,35 +121,87 @@ def iter_scales(image: DataTree) -> Iterator[xr.DataArray]:
     Yields:
         Each scale (as a `xr.DataArray`)
     """
-    assert isinstance(
-        image, DataTree
-    ), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}"
+    assert isinstance(image, DataTree), f"Multiscale iteration is reserved for type DataTree. Found {type(image)}"
 
     for scale in image:
         yield next(iter(image[scale].values()))
 
 
+def get_spatial_element(
+    element_dict: dict[str, SpatialElement],
+    key: str | None = None,
+    valid_attr: str | None = None,
+    return_key: bool = False,
+    as_spatial_image: bool = False,
+) -> SpatialElement | tuple[str, SpatialElement]:
+    """Gets an element from a SpatialData object.
+
+    Args:
+        sdata: SpatialData object.
+        key: Optional element key. If `None`, returns the only element (if only one), or tries to find an element with `valid_attr`.
+        return_key: Whether to also return the key of the element.
+        valid_attr: Attribute that the element must have to be considered valid.
+        as_spatial_image: Whether to return the element as a `SpatialImage` (if it is a `DataTree`)
+
+    Returns:
+        If `return_key` is False, only the element is returned, else a tuple `(element_key, element)`
+    """
+    assert len(element_dict), "No spatial element was found in the dict."
+
+    if key is not None:
+        return _return_element(element_dict, key, return_key, as_spatial_image)
+
+    if len(element_dict) == 1:
+        key = next(iter(element_dict.keys()))
+
+        assert valid_attr is None or element_dict[key].attrs.get(
+            valid_attr, True
+        ), f"Element {key} is not valid for the attribute {valid_attr}."
+
+        return _return_element(element_dict, key, return_key, as_spatial_image)
+
+    assert valid_attr is not None, "Multiple elements found. Provide an element key."
+
+    keys = [key for key, element in element_dict.items() if element.attrs.get(valid_attr)]
+
+    assert len(keys) > 0, f"No element with the attribute {valid_attr}. Provide an element key."
+    assert len(keys) == 1, f"Multiple valid elements found: {keys}. Provide an element key."
+
+    return _return_element(element_dict, keys[0], return_key, as_spatial_image)
+
+
 def get_spatial_image(
-    sdata: SpatialData, key: str | None = None, return_key: bool = False
+    sdata: SpatialData,
+    key: str | None = None,
+    return_key: bool = False,
+    valid_attr: str = SopaAttrs.CELL_SEGMENTATION,
 ) -> DataArray | tuple[str, DataArray]:
     """Gets a DataArray from a SpatialData object (if the image has multiple scale, the `scale0` is returned)
 
     Args:
         sdata: SpatialData object.
-        key: Optional image key. If `None`, returns the only image (if only one), or raises an error.
+        key: Optional image key. If `None`, returns the only image (if only one), or tries to find an image with `valid_attr`.
         return_key: Whether to also return the key of the image.
+        valid_attr: Attribute that the image must have to be considered valid.
 
     Returns:
         If `return_key` is False, only the image is returned, else a tuple `(image_key, image)`
     """
-    key = get_key(sdata, "images", key)
+    return get_spatial_element(
+        sdata.images,
+        key=key,
+        valid_attr=valid_attr,
+        return_key=return_key,
+        as_spatial_image=True,
+    )
+
 
-    assert key is not None, "One image in `sdata.images` is required"
+def _return_element(
+    element_dict: dict[str, SpatialElement], key: str, return_key: bool, as_spatial_image: bool
+) -> SpatialElement | tuple[str, SpatialElement]:
+    element = element_dict[key]
 
-    image = sdata.images[key]
-    if isinstance(image, DataTree):
-        image = next(iter(image["scale0"].values()))
+    if as_spatial_image and isinstance(element, DataTree):
+        element = next(iter(element["scale0"].values()))
 
-    if return_key:
-        return key, image
-    return image
+    return (key, element) if return_key else element
diff --git a/sopa/annotation/tangram/run.py b/sopa/annotation/tangram/run.py
@@ -124,9 +124,7 @@ def init_obsm(self, level: int):
         )
 
     def get_hard_labels(self, df: pd.DataFrame) -> pd.Series:
-        df = df.clip(
-            df.quantile(1 - self.clip_percentile), df.quantile(self.clip_percentile), axis=1
-        )
+        df = df.clip(df.quantile(1 - self.clip_percentile), df.quantile(self.clip_percentile), axis=1)
         df = (df - df.min()) / (df.max() - df.min())
         return df.idxmax(1)
 
@@ -138,9 +136,7 @@ def pp_adata(self, ad_sp_: AnnData, ad_sc_: AnnData, split: np.ndarray) -> AnnDa
         sc.pp.filter_genes(ad_sp_split, min_cells=1)
 
         # Calculate uniform density prior as 1/number_of_spots
-        ad_sp_split.obs["uniform_density"] = (
-            np.ones(ad_sp_split.X.shape[0]) / ad_sp_split.X.shape[0]
-        )
+        ad_sp_split.obs["uniform_density"] = np.ones(ad_sp_split.X.shape[0]) / ad_sp_split.X.shape[0]
 
         # Calculate rna_count_based density prior as % of rna molecule count
         rna_count_per_spot = np.array(ad_sp_split.X.sum(axis=1)).squeeze()
@@ -157,8 +153,7 @@ def pp_adata(self, ad_sp_: AnnData, ad_sc_: AnnData, split: np.ndarray) -> AnnDa
         )
 
         selection = list(
-            set(ad_sp_split.var_names[ad_sp_split.var.counts > 0])
-            & set(ad_sc_.var_names[ad_sc_.var.counts > 0])
+            set(ad_sp_split.var_names[ad_sp_split.var.counts > 0]) & set(ad_sc_.var_names[ad_sc_.var.counts > 0])
         )
 
         assert len(

diff --git a/sopa/cli/annotate.py b/sopa/cli/annotate.py
@@ -13,9 +13,7 @@
 def fluorescence(
     sdata_path: str = typer.Argument(help=SDATA_HELPER),
     marker_cell_dict: str = typer.Option(callback=ast.literal_eval),
-    cell_type_key: str = typer.Option(
-        "cell_type", help="Key added in `adata.obs` corresponding to the cell type"
-    ),
+    cell_type_key: str = typer.Option("cell_type", help="Key added in `adata.obs` corresponding to the cell type"),
 ):
     """Simple annotation based on fluorescence, where each provided channel corresponds to one cell type.
 
@@ -39,9 +37,7 @@ def fluorescence(
 @app_annotate.command()
 def tangram(
     sdata_path: str = typer.Argument(help=SDATA_HELPER),
-    sc_reference_path: str = typer.Option(
-        help="Path to the scRNAseq annotated reference, as a `.h5ad` file"
-    ),
+    sc_reference_path: str = typer.Option(help="Path to the scRNAseq annotated reference, as a `.h5ad` file"),
     cell_type_key: str = typer.Option(help="Key of `adata_ref.obs` containing the cell-types"),
     reference_preprocessing: str = typer.Option(
         None,