diff --git a/conf/modules.config b/conf/modules.config
index 81395a1d..8a53c285 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -32,25 +32,28 @@ process {
     }
 
     if (!params.skip_emptydrops) {
-        withName: EMPTYDROPS_CELL_CALLING {
+        withName: 'CELLBENDER_REMOVEBACKGROUND' {
             publishDir = [
-                path: { "${params.outdir}/${params.aligner}" },
-                mode: params.publish_dir_mode,
-                saveAs: { filename ->
-                    if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}"
-                    else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}"
-                    else "${meta.id}/${filename}"
-                }
+                path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" },
+                mode: params.publish_dir_mode
+            ]
+        }
+        withName: 'ADATA_BARCODES' {
+            ext.prefix = { "${meta.id}_custom_emptydrops_filter_matrix" }
+            publishDir = [
+                path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" },
+                mode: params.publish_dir_mode
             ]
         }
     }
 
-    withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' {
+    withName: 'MTX_TO_H5AD*|CONCAT_H5AD|ANNDATAR_CONVERT' {
         publishDir = [
             path: { "${params.outdir}/${params.aligner}/mtx_conversions" },
             mode: params.publish_dir_mode
         ]
     }
+
     withName: 'GTF_GENE_FILTER' {
         publishDir = [
             path: { "${params.outdir}/gtf_filter" },
diff --git a/modules.json b/modules.json
index aa186d98..b9680cfe 100644
--- a/modules.json
+++ b/modules.json
@@ -5,6 +5,11 @@
         "https://github.com/nf-core/modules.git": {
             "modules": {
                 "nf-core": {
+                    "cellbender/removebackground": {
+                        "branch": "master",
+                        "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48",
+                        "installed_by": ["modules"]
+                    },
                     "cellranger/count": {
                         "branch": "master",
                         "git_sha": "90dad5491658049282ceb287a3d7732c1ce39837",
diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf
new file mode 100644
index 00000000..2aef8ec9
--- /dev/null
+++ b/modules/local/adata_barcodes.nf
@@ -0,0 +1,23 @@
+process ADATA_BARCODES {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8':
+        'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }"
+
+    input:
+    tuple val(meta), path(h5ad), path(barcodes_csv)
+
+    output:
+    tuple val(meta), path("*.h5ad"), emit: h5ad
+    path "versions.yml"            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    template 'barcodes.py'
+}
diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf
new file mode 100644
index 00000000..dfe5fce9
--- /dev/null
+++ b/modules/local/anndatar_convert.nf
@@ -0,0 +1,24 @@
+process ANNDATAR_CONVERT {
+    tag "${meta.id}"
+
+    label 'process_medium'
+
+    container "fmalmeida/anndatar:dev" // TODO: Fix
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("${meta.id}_standardized.Rds"), emit: rds
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    template 'anndatar_convert.R'
+
+    stub:
+    """
+    touch ${meta.id}_standardized.Rds
+    """
+}
diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf
index cd08cbbe..41310553 100644
--- a/modules/local/concat_h5ad.nf
+++ b/modules/local/concat_h5ad.nf
@@ -1,13 +1,13 @@
 process CONCAT_H5AD {
+    tag "${meta.id}"
+
     label 'process_medium'
 
-    conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg"
-    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
-        'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' :
-        'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }"
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
 
     input:
-    tuple val(input_type), path(h5ad)
+    tuple val(meta), path(h5ad)
     path samplesheet
 
     output:
@@ -17,12 +17,7 @@ process CONCAT_H5AD {
     task.ext.when == null || task.ext.when
 
     script:
-    """
-    concat_h5ad.py \\
-        --input $samplesheet \\
-        --out combined_${input_type}_matrix.h5ad \\
-        --suffix "_matrix.h5ad"
-    """
+    template 'concat_h5ad.py'
 
     stub:
     """
diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf
new file mode 100644
index 00000000..84474ed0
--- /dev/null
+++ b/modules/local/mtx_to_h5ad_star.nf
@@ -0,0 +1,35 @@
+process MTX_TO_H5AD_STAR {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg"
+    container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538"
+
+    input:
+    tuple val(meta), path(inputs)
+    path star_index
+
+    output:
+    tuple val(meta2), path("${meta.id}/*h5ad"), emit: h5ad
+    path  "versions.yml"                      , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    // Get a file to check input type. Some aligners bring arrays instead of a single file.
+    def input_to_check = (inputs instanceof String) ? inputs : inputs[0]
+
+    // check input type of inputs
+    input_type   = (input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered'
+    meta2        = meta + [input_type: input_type]
+
+    template 'mtx_to_h5ad_star.py'
+
+    stub:
+    """
+    mkdir ${meta.id}
+    touch ${meta.id}/${meta.id}_matrix.h5ad
+    touch versions.yml
+    """
+}
diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R
new file mode 100755
index 00000000..479ac912
--- /dev/null
+++ b/modules/local/templates/anndatar_convert.R
@@ -0,0 +1,15 @@
+#!/usr/bin/env Rscript
+
+# to use nf variables: "${meta.id}"
+
+# load libraries
+library(anndataR)
+
+# read input
+adata <- read_h5ad("${h5ad}")
+
+# convert to Rds
+obj <- adata\$to_Seurat()
+
+# save files
+saveRDS(obj, file = "${meta.id}_standardized.Rds")
diff --git a/modules/local/templates/barcodes.py b/modules/local/templates/barcodes.py
new file mode 100644
index 00000000..8a9b10a7
--- /dev/null
+++ b/modules/local/templates/barcodes.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+
+import platform
+import anndata as ad
+import pandas as pd
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+df = pd.read_csv("${barcodes_csv}", header=None)
+adata = ad.read_h5ad("${h5ad}")
+
+adata = adata[df[0].values]
+
+adata.write_h5ad("${prefix}.h5ad")
+
+# Versions
+
+versions = {
+    "${task.process}": {
+        "python": platform.python_version(),
+        "anndata": ad.__version__,
+        "pandas": pd.__version__
+    }
+}
+
+with open("versions.yml", "w") as f:
+    f.write(format_yaml_like(versions))
diff --git a/bin/concat_h5ad.py b/modules/local/templates/concat_h5ad.py
similarity index 53%
rename from bin/concat_h5ad.py
rename to modules/local/templates/concat_h5ad.py
index 43ea071a..033bc89a 100755
--- a/bin/concat_h5ad.py
+++ b/modules/local/templates/concat_h5ad.py
@@ -7,7 +7,6 @@
 
 import scanpy as sc, anndata as ad, pandas as pd
 from pathlib import Path
-import argparse
 
 
 def read_samplesheet(samplesheet):
@@ -17,36 +16,24 @@ def read_samplesheet(samplesheet):
     # samplesheet may contain replicates, when it has,
     # group information from replicates and collapse with commas
     # only keep unique values using set()
-    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column)))
+    df = df.groupby(["sample"]).agg(lambda column: ",".join(set(str(column))))
 
     return df
 
 
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet")
-
-    parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv")
-    parser.add_argument("-o", "--out", dest="out", help="Output path.")
-    parser.add_argument(
-        "-s",
-        "--suffix",
-        dest="suffix",
-        help="Suffix of matrices to remove and get sample name",
-    )
-
-    args = vars(parser.parse_args())
 
     # Open samplesheet as dataframe
-    df_samplesheet = read_samplesheet(args["input"])
+    df_samplesheet = read_samplesheet("${samplesheet}")
 
     # find all h5ad and append to dict
-    dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")}
+    dict_of_h5ad = {str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")}
 
     # concat h5ad files
     adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_")
 
     # merge with data.frame, on sample information
-    adata.obs = adata.obs.join(df_samplesheet, on="sample")
-    adata.write_h5ad(args["out"], compression="gzip")
+    adata.obs = adata.obs.join(df_samplesheet, on="sample").astype(str)
+    adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad", compression="gzip")
 
-    print("Wrote h5ad file to {}".format(args["out"]))
+    print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad"))
diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py
new file mode 100755
index 00000000..48749114
--- /dev/null
+++ b/modules/local/templates/mtx_to_h5ad_star.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+
+# Set numba chache dir to current working directory (which is a writable mount also in containers)
+import os
+
+os.environ["NUMBA_CACHE_DIR"] = "."
+
+import scanpy as sc
+import pandas as pd
+import argparse
+from anndata import AnnData
+import platform
+
+def _mtx_to_adata(
+    input: str,
+    sample: str,
+):
+    adata = sc.read_10x_mtx(input)
+    adata.obs["sample"] = sample
+
+    return adata
+
+
+def format_yaml_like(data: dict, indent: int = 0) -> str:
+    """Formats a dictionary to a YAML-like string.
+    Args:
+        data (dict): The dictionary to format.
+        indent (int): The current indentation level.
+    Returns:
+        str: A string formatted as YAML.
+    """
+    yaml_str = ""
+    for key, value in data.items():
+        spaces = "  " * indent
+        if isinstance(value, dict):
+            yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}"
+        else:
+            yaml_str += f"{spaces}{key}: {value}\\n"
+    return yaml_str
+
+def dump_versions():
+    versions = {
+        "${task.process}": {
+            "python": platform.python_version(),
+            "scanpy": sc.__version__,
+            "pandas": pd.__version__
+        }
+    }
+
+    with open("versions.yml", "w") as f:
+        f.write(format_yaml_like(versions))
+
+def input_to_adata(
+    input_data: str,
+    output: str,
+    sample: str,
+):
+    print(f"Reading in {input_data}")
+
+    # open main data
+    adata = _mtx_to_adata(input_data, sample)
+
+    # standard format
+    # index are gene IDs and symbols are a column
+    adata.var["gene_symbol"] = adata.var.index
+    adata.var['gene_versions'] = adata.var["gene_ids"]
+    adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0]
+    adata.var.index = adata.var["gene_ids"].values
+    adata.var = adata.var.drop("gene_ids", axis=1)
+
+    # write results
+    adata.write_h5ad(f"{output}", compression="gzip")
+    print(f"Wrote h5ad file to {output}")
+
+    # dump versions
+    dump_versions()
+
+    return adata
+
+#
+# Run main script
+#
+
+# create the directory with the sample name
+os.makedirs("${meta.id}", exist_ok=True)
+
+# input_type comes from NF module
+adata = input_to_adata(
+    input_data="${input_type}",
+    output="${meta.id}/${meta.id}_${input_type}_matrix.h5ad",
+    sample="${meta.id}"
+)
diff --git a/modules/nf-core/cellbender/removebackground/environment.yml b/modules/nf-core/cellbender/removebackground/environment.yml
new file mode 100644
index 00000000..a157c522
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/environment.yml
@@ -0,0 +1,5 @@
+channels:
+  - conda-forge
+  - bioconda
+dependencies:
+  - bioconda::cellbender=0.3.0
diff --git a/modules/nf-core/cellbender/removebackground/main.nf b/modules/nf-core/cellbender/removebackground/main.nf
new file mode 100644
index 00000000..f3cfd1ff
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/main.nf
@@ -0,0 +1,65 @@
+process CELLBENDER_REMOVEBACKGROUND {
+    tag "$meta.id"
+    label 'process_medium'
+    label 'process_gpu'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'oras://community.wave.seqera.io/library/cellbender:0.3.0--c4addb97ab2d83fe':
+        'community.wave.seqera.io/library/cellbender:0.3.0--41318a055fc3aacb' }"
+
+    input:
+    tuple val(meta), path(h5ad)
+
+    output:
+    tuple val(meta), path("${prefix}.h5")               , emit: h5
+    tuple val(meta), path("${prefix}_filtered.h5")      , emit: filtered_h5
+    tuple val(meta), path("${prefix}_posterior.h5")     , emit: posterior_h5
+    tuple val(meta), path("${prefix}_cell_barcodes.csv"), emit: barcodes
+    tuple val(meta), path("${prefix}_metrics.csv")      , emit: metrics
+    tuple val(meta), path("${prefix}_report.html")      , emit: report
+    tuple val(meta), path("${prefix}.pdf")              , emit: pdf
+    tuple val(meta), path("${prefix}.log")              , emit: log
+    tuple val(meta), path("ckpt.tar.gz")                , emit: checkpoint
+    path "versions.yml"                                 , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    args = task.ext.args ?: ""
+    use_gpu = task.ext.use_gpu ? "--cuda" : ""
+    """
+    TMPDIR=. cellbender remove-background \
+        ${args} \
+        --cpu-threads ${task.cpus} \
+        ${use_gpu} \
+        --input ${h5ad} \
+        --output ${prefix}.h5
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cellbender: \$(cellbender --version)
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch "${prefix}.h5"
+    touch "${prefix}_filtered.h5"
+    touch "${prefix}_posterior.h5"
+    touch "${prefix}_cell_barcodes.csv"
+    touch "${prefix}_metrics.csv"
+    touch "${prefix}_report.html"
+    touch "${prefix}.pdf"
+    touch "${prefix}.log"
+    touch "ckpt.tar.gz"
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        cellbender: \$(cellbender --version)
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/cellbender/removebackground/meta.yml b/modules/nf-core/cellbender/removebackground/meta.yml
new file mode 100644
index 00000000..d70fa3fd
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/meta.yml
@@ -0,0 +1,75 @@
+name: cellbender_removebackground
+description: Module to use CellBender to estimate ambient RNA from single-cell RNA-seq data
+keywords:
+  - single-cell
+  - scRNA-seq
+  - ambient RNA removal
+tools:
+  - cellbender:
+      description: CellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data.
+      documentation: https://cellbender.readthedocs.io/en/latest/
+      tool_dev_url: https://github.com/broadinstitute/CellBender
+      licence: ["BSD-3-Clause"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test' ]
+  - h5ad:
+      type: file
+      description: AnnData file containing unfiltered data (with empty droplets)
+      pattern: "*.h5ad"
+output:
+  - h5:
+      type: file
+      description: Full count matrix as an h5 file, with background RNA removed. This file contains all the original droplet barcodes.
+      pattern: "*.h5"
+  - filtered_h5:
+      type: file
+      description: |
+        Full count matrix as an h5 file, with background RNA removed. This file contains only the droplet barcodes which were determined to have a > 50% posterior probability of containing cells.
+      pattern: "*.h5"
+  - posterior_h5:
+      type: file
+      description: |
+        The full posterior probability of noise counts. This is not normally used downstream.
+      pattern: "*.h5"
+  - barcodes:
+      type: file
+      description: |
+        CSV file containing all the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. |
+        Barcodes are written in plain text. This information is also contained in each of the above outputs, |
+        but is included as a separate output for convenient use in certain downstream applications.
+      pattern: "*.csv"
+  - metrics:
+      type: file
+      description: |
+        Metrics describing the run, potentially to be used to flag problematic runs |
+        when using CellBender as part of a large-scale automated pipeline.
+      pattern: "*.csv"
+  - report:
+      type: file
+      description: |
+        HTML report including plots and commentary, along with any warnings or suggestions for improved parameter settings.
+      pattern: "*.html"
+  - pdf:
+      type: file
+      description: PDF file that provides a standard graphical summary of the inference procedure.
+      pattern: "*.pdf"
+  - log:
+      type: file
+      description: Log file produced by the cellbender remove-background run.
+      pattern: "*.log"
+  - checkpoint:
+      type: file
+      description: Checkpoint file which contains the trained model and the full posterior.
+      pattern: "*.ckpt"
+  - versions:
+      type: file
+      description: File containing software version
+      pattern: "versions.yml"
+authors:
+  - "@nictru"
+maintainers:
+  - "@nictru"
diff --git a/modules/nf-core/cellbender/removebackground/tests/epochs.config b/modules/nf-core/cellbender/removebackground/tests/epochs.config
new file mode 100644
index 00000000..96282b07
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/epochs.config
@@ -0,0 +1,6 @@
+
+process {
+    withName: CELLBENDER_REMOVEBACKGROUND {
+        ext.args = '--epochs 20'
+    }
+}
diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test b/modules/nf-core/cellbender/removebackground/tests/main.nf.test
new file mode 100644
index 00000000..1afa6f3b
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test
@@ -0,0 +1,66 @@
+nextflow_process {
+    name 'Test Process CELLBENDER_REMOVEBACKGROUND'
+    script '../main.nf'
+    process 'CELLBENDER_REMOVEBACKGROUND'
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "cellbender/removebackground"
+    tag "cellbender"
+
+    test("test_cellbender_removebackground") {
+        config './epochs.config'
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true)
+                ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                {assert process.success},
+                {assert file(process.out.h5.get(0).get(1)).exists()},
+                {assert file(process.out.filtered_h5.get(0).get(1)).exists()},
+                {assert file(process.out.posterior_h5.get(0).get(1)).exists()},
+                {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes")},
+                {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics")},
+                {assert file(process.out.report.get(0).get(1)).exists()},
+                {assert file(process.out.pdf.get(0).get(1)).exists()},
+                {assert file(process.out.log.get(0).get(1)).exists()},
+                {assert snapshot(process.out.versions).match("cellbender_removebackground_versions")}
+            )
+        }
+    }
+
+    test("test_cellbender_removebackground - stub") {
+        options '-stub'
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ], // meta map
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true)
+                ]
+                """
+            }
+        }
+        then {
+            assertAll(
+                {assert process.success},
+                {assert snapshot(process.out.h5).match("cellbender_removebackground_h5_stub")},
+                {assert snapshot(process.out.filtered_h5).match("cellbender_removebackground_filtered_h5_stub")},
+                {assert snapshot(process.out.posterior_h5).match("cellbender_removebackground_posterior_h5_stub")},
+                {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes_stub")},
+                {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics_stub")},
+                {assert snapshot(process.out.report).match("cellbender_removebackground_report_stub")},
+                {assert snapshot(process.out.pdf).match("cellbender_removebackground_pdf_stub")},
+                {assert snapshot(process.out.log).match("cellbender_removebackground_log_stub")},
+                {assert snapshot(process.out.versions).match("cellbender_removebackground_versions_stub")}
+            )
+        }
+    }
+}
diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap
new file mode 100644
index 00000000..fdb51d66
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap
@@ -0,0 +1,196 @@
+{
+    "cellbender_removebackground_versions": {
+        "content": [
+            [
+                "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:09.33127881"
+    },
+    "cellbender_removebackground_filtered_h5_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_filtered.h5:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.833598082"
+    },
+    "cellbender_removebackground_pdf_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.891829278"
+    },
+    "cellbender_removebackground_metrics": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.csv:md5,88272bde1c157528b0b0ab2abe5ad26f"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:09.327155805"
+    },
+    "cellbender_removebackground_versions_stub": {
+        "content": [
+            [
+                "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a"
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.904614838"
+    },
+    "cellbender_removebackground_h5_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.829304361"
+    },
+    "cellbender_removebackground_metrics_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_metrics.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.870469733"
+    },
+    "cellbender_removebackground_log_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.899293304"
+    },
+    "cellbender_removebackground_barcodes": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_cell_barcodes.csv:md5,c8e8df9d0f9aea976d6f6aa36d329429"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:09.316098811"
+    },
+    "cellbender_removebackground_report_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_report.html:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.885307244"
+    },
+    "cellbender_removebackground_posterior_h5_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_posterior.h5:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.838032754"
+    },
+    "cellbender_removebackground_barcodes_stub": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test_cell_barcodes.csv:md5,d41d8cd98f00b204e9800998ecf8427e"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.9.0",
+            "nextflow": "24.04.3"
+        },
+        "timestamp": "2024-08-12T13:41:20.861284979"
+    }
+}
\ No newline at end of file
diff --git a/modules/nf-core/cellbender/removebackground/tests/tags.yml b/modules/nf-core/cellbender/removebackground/tests/tags.yml
new file mode 100644
index 00000000..d935083b
--- /dev/null
+++ b/modules/nf-core/cellbender/removebackground/tests/tags.yml
@@ -0,0 +1,2 @@
+cellbender/removebackground:
+  - modules/nf-core/cellbender/removebackground/**
diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf
new file mode 100644
index 00000000..202612d6
--- /dev/null
+++ b/subworkflows/local/emptydrops_removal.nf
@@ -0,0 +1,25 @@
+include { CELLBENDER_REMOVEBACKGROUND } from '../../modules/nf-core/cellbender/removebackground'
+include { ADATA_BARCODES              } from '../../modules/local/adata_barcodes'
+
+workflow EMPTY_DROPLET_REMOVAL {
+    take:
+    ch_unfiltered
+
+    main:
+    ch_versions = Channel.empty()
+
+    CELLBENDER_REMOVEBACKGROUND(ch_unfiltered)
+    ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions)
+
+    ch_combined = ch_unfiltered.join(CELLBENDER_REMOVEBACKGROUND.out.barcodes)
+
+    ADATA_BARCODES(ch_combined)
+    ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions)
+
+    ch_h5ad = ADATA_BARCODES.out.h5ad
+
+    emit:
+    h5ad = ch_h5ad
+
+    versions = ch_versions
+}
diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf
index 98e49a2e..6435966d 100644
--- a/subworkflows/local/mtx_conversion.nf
+++ b/subworkflows/local/mtx_conversion.nf
@@ -1,62 +1,41 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { MTX_TO_H5AD   }             from '../../modules/local/mtx_to_h5ad.nf'
-include { CONCAT_H5AD   }             from '../../modules/local/concat_h5ad.nf'
-include { MTX_TO_SEURAT }             from '../../modules/local/mtx_to_seurat.nf'
+include { CONCAT_H5AD           } from '../../modules/local/concat_h5ad.nf'
+include { ANNDATAR_CONVERT      } from '../../modules/local/anndatar_convert'
 
 workflow MTX_CONVERSION {
 
     take:
     mtx_matrices
     samplesheet
-    txp2gene
-    star_index
 
     main:
         ch_versions = Channel.empty()
 
-        // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need.
-        // Keeping backwards compatibility with cellranger-arc.
-        // TODO: Adapt cellranger-arc subworkflow like cellranger to remove this snippet here.
-        if (params.aligner in [ 'cellrangerarc' ]) {
-            mtx_matrices = mtx_matrices.map { meta, mtx_files ->
-                    [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ]
-                }
-                .filter { meta, mtx_files -> mtx_files } // Remove any that are missing the relevant files
-        }
-
         //
-        // Convert matrix to h5ad
+        // MODULE: Convert to Rds with AnndataR package
         //
-        MTX_TO_H5AD (
-            mtx_matrices,
-            txp2gene,
-            star_index
+        ANNDATAR_CONVERT (
+            mtx_matrices
         )
 
         //
         // Concat sample-specific h5ad in one
         //
-        ch_concat_h5ad_input = MTX_TO_H5AD.out.h5ad.groupTuple() // gather all sample-specific files / per type
+        ch_concat_h5ad_input = mtx_matrices.groupTuple() // gather all sample-specific files / per type
         if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') {
             // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] )
             // which nextflow break because it is not a valid 'path' thus, we have to remove one level
             // making it as [ mtx_1, mtx_2 ]
             ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] }
         }
+
         CONCAT_H5AD (
             ch_concat_h5ad_input,
             samplesheet
         )
 
-        //
-        // Convert matrix do seurat
-        //
-        MTX_TO_SEURAT (
-            mtx_matrices
-        )
-
         //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output
-        ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions)
+        // ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions)
 
     emit:
     ch_versions
diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf
index 0c11acd1..8236632d 100644
--- a/subworkflows/local/starsolo.nf
+++ b/subworkflows/local/starsolo.nf
@@ -1,5 +1,6 @@
 /* --    IMPORT LOCAL MODULES/SUBWORKFLOWS     -- */
-include { STAR_ALIGN }                  from '../../modules/local/star_align'
+include { STAR_ALIGN }                      from '../../modules/local/star_align'
+include { MTX_TO_H5AD_STAR as MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad_star'
 
 /* --    IMPORT NF-CORE MODULES/SUBWORKFLOWS   -- */
 include { GUNZIP }                      from '../../modules/nf-core/gunzip/main'
@@ -53,14 +54,21 @@ workflow STARSOLO {
     )
     ch_versions = ch_versions.mix(STAR_ALIGN.out.versions)
 
+    /*
+    * Perform h5ad conversion
+    */
+    MTX_TO_H5AD (
+        STAR_ALIGN.out.raw_counts.mix( STAR_ALIGN.out.filtered_counts ),
+        star_index.map{ meta, index -> index }
+    )
+    ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first())
+
 
     emit:
     ch_versions
     // get rid of meta for star index
-    star_index  = star_index.map{ meta, index -> index }
     star_result = STAR_ALIGN.out.tab
     star_counts = STAR_ALIGN.out.counts
-    raw_counts = STAR_ALIGN.out.raw_counts
-    filtered_counts = STAR_ALIGN.out.filtered_counts
+    star_h5ad   = MTX_TO_H5AD.out.h5ad
     for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it }
 }
diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf
index 10ced221..1e7ea9ef 100644
--- a/workflows/scrnaseq.nf
+++ b/workflows/scrnaseq.nf
@@ -3,13 +3,13 @@ include { FASTQC_CHECK                       } from '../subworkflows/local/fastq
 include { KALLISTO_BUSTOOLS                  } from '../subworkflows/local/kallisto_bustools'
 include { SCRNASEQ_ALEVIN                    } from '../subworkflows/local/alevin'
 include { STARSOLO                           } from '../subworkflows/local/starsolo'
-include { CELLRANGER_ALIGN                   } from "../subworkflows/local/align_cellranger"
-include { CELLRANGER_MULTI_ALIGN             } from "../subworkflows/local/align_cellrangermulti"
-include { CELLRANGERARC_ALIGN                } from "../subworkflows/local/align_cellrangerarc"
-include { UNIVERSC_ALIGN                     } from "../subworkflows/local/align_universc"
-include { MTX_CONVERSION                     } from "../subworkflows/local/mtx_conversion"
+include { CELLRANGER_ALIGN                   } from '../subworkflows/local/align_cellranger'
+include { CELLRANGER_MULTI_ALIGN             } from '../subworkflows/local/align_cellrangermulti'
+include { CELLRANGERARC_ALIGN                } from '../subworkflows/local/align_cellrangerarc'
+include { UNIVERSC_ALIGN                     } from '../subworkflows/local/align_universc'
+include { EMPTY_DROPLET_REMOVAL              } from '../subworkflows/local/emptydrops_removal'
+include { MTX_CONVERSION                     } from '../subworkflows/local/mtx_conversion'
 include { GTF_GENE_FILTER                    } from '../modules/local/gtf_gene_filter'
-include { EMPTYDROPS_CELL_CALLING            } from '../modules/local/emptydrops'
 include { GUNZIP as GUNZIP_FASTA             } from '../modules/nf-core/gunzip/main'
 include { GUNZIP as GUNZIP_GTF               } from '../modules/nf-core/gunzip/main'
 include { paramsSummaryMultiqc               } from '../subworkflows/nf-core/utils_nfcore_pipeline'
@@ -86,8 +86,8 @@ workflow SCRNASEQ {
     ch_multi_samplesheet              = params.cellranger_multi_barcodes ? file(params.cellranger_multi_barcodes, checkIfExists: true) : []
     empty_file                        = file("$projectDir/assets/EMPTY", checkIfExists: true)
 
-    ch_versions     = Channel.empty()
-    ch_mtx_matrices = Channel.empty()
+    ch_versions      = Channel.empty()
+    ch_h5ad_matrices = Channel.empty()
 
     // Run FastQC
     if (!params.skip_fastqc) {
@@ -137,7 +137,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts)
         ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene
     }
 
@@ -155,7 +155,7 @@ workflow SCRNASEQ {
         )
         ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions)
         ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it })
-        ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results)
     }
 
     // Run STARSolo pipeline
@@ -171,9 +171,8 @@ workflow SCRNASEQ {
             protocol_config.get('extra_args', ""),
         )
         ch_versions = ch_versions.mix(STARSOLO.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts)
-        ch_star_index = STARSOLO.out.star_index
         ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc)
+        ch_h5ad_matrices = STARSOLO.out.star_h5ad
     }
 
     // Run cellranger pipeline
@@ -186,7 +185,6 @@ workflow SCRNASEQ {
             protocol_config['protocol']
         )
         ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices)
         ch_star_index = CELLRANGER_ALIGN.out.star_index
         ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"}
@@ -203,7 +201,7 @@ workflow SCRNASEQ {
             ch_fastq
         )
         ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(UNIVERSC_ALIGN.out.universc_out)
     }
 
     // Run cellrangerarc pipeline
@@ -217,7 +215,7 @@ workflow SCRNASEQ {
             ch_cellrangerarc_config
         )
         ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions)
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out)
     }
 
     // Run cellrangermulti pipeline
@@ -285,38 +283,34 @@ workflow SCRNASEQ {
         ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{
             meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" }
         })
-        ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
+        ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx)
 
     }
 
-    // Run emptydrops calling module
+    // SUBWORKFLOW: Run cellbender emptydrops filter
     if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) {
 
         //
         // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it
         //
         if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) {
-            ch_mtx_matrices_for_emptydrops =
-                ch_mtx_matrices.filter { meta, mtx_files ->
-                    mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger
-                    mtx_files.toString().contains("counts_unfiltered")     || // kallisto
-                    mtx_files.toString().contains("raw")                      // star
-                }
+            ch_h5ad_matrices_for_emptydrops =
+                ch_h5ad_matrices.filter { meta, mtx_files -> meta.input_type == 'raw' }
         } else {
-            ch_mtx_matrices_for_emptydrops = ch_mtx_matrices
+            ch_h5ad_matrices_for_emptydrops = ch_h5ad_matrices
         }
 
-        EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops )
-        ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
+        EMPTY_DROPLET_REMOVAL (
+            ch_h5ad_matrices_for_emptydrops
+        )
+        // ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices )
 
     }
 
     // Run mtx to h5ad conversion subworkflow
     MTX_CONVERSION (
-        ch_mtx_matrices,
-        ch_input,
-        ch_txp2gene,
-        ch_star_index
+        ch_h5ad_matrices,
+        ch_input
     )
 
     //Add Versions from MTX Conversion workflow too