Merge pull request #17 from openproblems-bio/dev

LouisK92 · web-flow · commit 85284ec8c78c · 2025-03-12T18:53:33.000+01:00
Dev
diff --git a/common b/common
@@ -1 +1 @@
-Subproject commit 65e05af68a11ee87853fcf7a3c6b579001f21abe
+Subproject commit 80321bf1b5e44330c5ceadfc1434908bb58e2aff
diff --git a/src/methods_cell_type_annotation/moscot/config.vsh.yaml b/src/methods_cell_type_annotation/moscot/config.vsh.yaml
@@ -0,0 +1,60 @@
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+
+name: moscot
+label: "MOSCOT"
+summary: "Mapping of annotations from single-cell to spatial using moscot"
+description: "Mapping of annotations from single-cell to spatial using moscot"
+links:
+  documentation: "https://moscot.readthedocs.io"
+  repository: "https://github.com/theislab/moscot"
+references:
+  doi: "10.1038/s41586-024-08453-2"
+
+arguments:
+# TODO: alpha check range [0.7, 0.8, 0.9]
+# TODO: tau_a=tau_b check range [0.1, 0.2, 0.3] (seems only to work with tau=1 on our data)
+# TODO: rank depends on data set size, rank=5000 for 300k cells and down to rank=500 minimum (seems only to work with rank=-1 on our data)
+  - name: --alpha
+    required: false
+    direction: input
+    type: double
+    default: 0.8
+  - name: --epsilon
+    required: false
+    direction: input
+    type: double
+    default: 0.01
+  - name: --tau
+    required: false
+    direction: input
+    type: double
+    default: 1.0
+  - name: --rank
+    required: false
+    direction: input
+    type: integer
+    default: -1
+  - name: --mapping_mode
+    required: false
+    direction: input
+    type: string
+    choices: ["sum", "max"]
+    default: "max"
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1.0.0
+    setup:
+      - type: python
+        pypi: [numpy, anndata, scanpy, moscot, flax, diffrax]
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, midcpu, midmem ]
diff --git a/src/methods_cell_type_annotation/moscot/script.py b/src/methods_cell_type_annotation/moscot/script.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python3
+
+import numpy as np
+import anndata as ad
+import scanpy as sc
+
+import moscot as mt
+from moscot.problems.space import MappingProblem
+
+## VIASH START
+ par = {
+   'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+   'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+   'output': 'spatial_with_celltypes.h5ad',
+   'celltype_key': 'cell_type',
+   'alpha': 0.8,
+   'epsilon': 0.01,
+   'tau': 1.0,
+   'rank': -1,
+   'mapping_mode': 'max',
+ }
+ meta = {
+   'name': 'moscot',
+ }
+ ## VIASH END
+
+# Optional parameter check: For this specific annotation method the par['input_spatial_normalized_counts'] and par['input_scrnaseq_reference'] are required
+assert par['input_spatial_normalized_counts'] is not None, 'Spatial input is required for this annotation method.'
+assert par['input_scrnaseq_reference'] is not None, 'Single cell input is required for this annotation method.'
+
+# Read input
+adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+
+# Check for normalized layer and centroid information
+assert "normalized" in adata_sc.layers.keys(), 'Layer "normalized" is required for single-cell anndata'
+assert "normalized" in adata_sp.layers.keys(), 'Layer "normalized" is required for spatial anndata'
+assert "centroid_x" in adata_sp.obs and "centroid_y" in adata_sp.obs, '"Observation level columns "centroid_x" and "centroid_y" are required for spatial anndata'
+
+# Use normalized layer and create spatial obsm
+adata_sc.X = adata_sc.layers["normalized"]
+adata_sp.X = adata_sp.layers["normalized"]
+adata_sp.obsm["spatial"] = adata_sp.obs[["centroid_x", "centroid_y"]].to_numpy()
+
+# Define mapping problem
+mp = MappingProblem(adata_sc=adata_sc, adata_sp=adata_sp)
+
+mp = mp.prepare(
+    sc_attr={"attr": "layers", "key": "normalized"},
+    xy_callback="local-pca",
+)
+
+mp = mp.solve(
+    alpha=par['alpha'],
+    epsilon=par['epsilon'],
+    tau_a=par['tau'],
+    tau_b=par['tau'],
+    rank=par['rank'],
+)
+
+# Map annotations
+anno_map_max = mp.annotation_mapping(
+    mapping_mode=par['mapping_mode'],
+    annotation_label=par['celltype_key'],
+    source="src",
+    forward=False,
+)
+adata_sp.obs[par['celltype_key']] = anno_map_max[par['celltype_key']].values
+
+# Write output
+adata_sp.write_h5ad(par['output'])
diff --git a/src/methods_cell_type_annotation/tacco/config.vsh.yaml b/src/methods_cell_type_annotation/tacco/config.vsh.yaml
@@ -0,0 +1,29 @@
+__merge__: /src/api/comp_method_cell_type_annotation.yaml
+
+name: tacco
+label: "Tacco"
+summary: "Annotate cell types using Tacco"
+description: "Annotate cell types using Tacco"
+links:
+  documentation: "https://simonwm.github.io/tacco/"
+  repository: "https://github.com/simonwm/tacco"
+references:
+  doi: "10.1038/s41587-023-01657-3"
+
+resources:
+  - type: python_script
+    path: script.py
+
+engines:
+  - type: docker
+    image: openproblems/base_python:1.0.0
+    setup:
+      - type: python
+        pypi: [anndata, numpy, tacco]
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ midtime, midcpu, midmem ]
diff --git a/src/methods_cell_type_annotation/tacco/script.py b/src/methods_cell_type_annotation/tacco/script.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import anndata as ad
+import numpy as np
+import tacco
+
+## VIASH START
+par = {
+  'input_spatial_normalized_counts': 'resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_normalized_counts.h5ad',
+  'input_scrnaseq_reference': 'resources_test/task_ist_preprocessing/mouse_brain_combined/scrnaseq_reference.h5ad',
+  'output': 'spatial_with_celltypes.h5ad',
+  'celltype_key': 'cell_type',
+}
+meta = {
+  'name': 'tacco',
+}
+## VIASH END
+
+# Optional parameter check: For this specific annotation method the par['input_spatial_normalized_counts'] and par['input_scrnaseq_reference'] are required
+assert par['input_spatial_normalized_counts'] is not None, 'Spatial input is required for this annotation method.'
+assert par['input_scrnaseq_reference'] is not None, 'Single cell input is required for this annotation method.'
+
+# Read input
+adata_sp = ad.read_h5ad(par['input_spatial_normalized_counts'])
+adata_sc = ad.read_h5ad(par['input_scrnaseq_reference'])
+
+# Switch to raw counts
+adata_sp.X = adata_sp.layers['counts']
+adata_sc.X = adata_sc.layers['counts']
+
+# Run tacco
+cell_type_assignment = tacco.tl.annotate(
+    adata=adata_sp,
+    reference=adata_sc,
+    annotation_key=par['celltype_key']
+)
+
+# Tacco stores the cell type proportions in a n_obs x n_celltypes matrix, so we have to extract the celltype with highest consensus
+cell_types = cell_type_assignment.columns
+highest_score_idx = np.argmax(cell_type_assignment, axis=1)
+adata_sp.obs[par['celltype_key']] = cell_types[highest_score_idx]
+
+# Write output
+adata_sp.write_h5ad(par['output'])