diff --git a/conf/modules.config b/conf/modules.config index 81395a1d..8a53c285 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -32,25 +32,28 @@ process { } if (!params.skip_emptydrops) { - withName: EMPTYDROPS_CELL_CALLING { + withName: 'CELLBENDER_REMOVEBACKGROUND' { publishDir = [ - path: { "${params.outdir}/${params.aligner}" }, - mode: params.publish_dir_mode, - saveAs: { filename -> - if ( params.aligner == 'cellranger' ) "count/${meta.id}/${filename}" - else if ( params.aligner == 'kallisto' ) "${meta.id}.count/${filename}" - else "${meta.id}/${filename}" - } + path: { "${params.outdir}/${params.aligner}/${meta.id}/emptydrops_filter" }, + mode: params.publish_dir_mode + ] + } + withName: 'ADATA_BARCODES' { + ext.prefix = { "${meta.id}_custom_emptydrops_filter_matrix" } + publishDir = [ + path: { "${params.outdir}/${params.aligner}/mtx_conversions/${meta.id}" }, + mode: params.publish_dir_mode ] } } - withName: 'MTX_TO_H5AD|CONCAT_H5AD|MTX_TO_SEURAT' { + withName: 'MTX_TO_H5AD*|CONCAT_H5AD|ANNDATAR_CONVERT' { publishDir = [ path: { "${params.outdir}/${params.aligner}/mtx_conversions" }, mode: params.publish_dir_mode ] } + withName: 'GTF_GENE_FILTER' { publishDir = [ path: { "${params.outdir}/gtf_filter" }, diff --git a/modules.json b/modules.json index aa186d98..b9680cfe 100644 --- a/modules.json +++ b/modules.json @@ -5,6 +5,11 @@ "https://github.com/nf-core/modules.git": { "modules": { "nf-core": { + "cellbender/removebackground": { + "branch": "master", + "git_sha": "06c8865e36741e05ad32ef70ab3fac127486af48", + "installed_by": ["modules"] + }, "cellranger/count": { "branch": "master", "git_sha": "90dad5491658049282ceb287a3d7732c1ce39837", diff --git a/modules/local/adata_barcodes.nf b/modules/local/adata_barcodes.nf new file mode 100644 index 00000000..2aef8ec9 --- /dev/null +++ b/modules/local/adata_barcodes.nf @@ -0,0 +1,23 @@ +process ADATA_BARCODES { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/anndata:0.10.7--e9840a94592528c8': + 'community.wave.seqera.io/library/anndata:0.10.7--336c6c1921a0632b' }" + + input: + tuple val(meta), path(h5ad), path(barcodes_csv) + + output: + tuple val(meta), path("*.h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + template 'barcodes.py' +} diff --git a/modules/local/anndatar_convert.nf b/modules/local/anndatar_convert.nf new file mode 100644 index 00000000..dfe5fce9 --- /dev/null +++ b/modules/local/anndatar_convert.nf @@ -0,0 +1,24 @@ +process ANNDATAR_CONVERT { + tag "${meta.id}" + + label 'process_medium' + + container "fmalmeida/anndatar:dev" // TODO: Fix + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("${meta.id}_standardized.Rds"), emit: rds + + when: + task.ext.when == null || task.ext.when + + script: + template 'anndatar_convert.R' + + stub: + """ + touch ${meta.id}_standardized.Rds + """ +} diff --git a/modules/local/concat_h5ad.nf b/modules/local/concat_h5ad.nf index cd08cbbe..41310553 100644 --- a/modules/local/concat_h5ad.nf +++ b/modules/local/concat_h5ad.nf @@ -1,13 +1,13 @@ process CONCAT_H5AD { + tag "${meta.id}" + label 'process_medium' - conda "conda-forge::scanpy conda-forge::python-igraph conda-forge::leidenalg" - container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/scanpy:1.7.2--pyhdfd78af_0' : - 'biocontainers/scanpy:1.7.2--pyhdfd78af_0' }" + conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg" + container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538" input: - tuple val(input_type), path(h5ad) + tuple val(meta), path(h5ad) path samplesheet output: @@ -17,12 +17,7 @@ process CONCAT_H5AD { task.ext.when == null || task.ext.when script: - """ - concat_h5ad.py \\ - --input $samplesheet \\ - --out combined_${input_type}_matrix.h5ad \\ - --suffix "_matrix.h5ad" - """ + template 'concat_h5ad.py' stub: """ diff --git a/modules/local/mtx_to_h5ad_star.nf b/modules/local/mtx_to_h5ad_star.nf new file mode 100644 index 00000000..84474ed0 --- /dev/null +++ b/modules/local/mtx_to_h5ad_star.nf @@ -0,0 +1,35 @@ +process MTX_TO_H5AD_STAR { + tag "$meta.id" + label 'process_medium' + + conda "conda-forge::scanpy==1.10.2 conda-forge::python-igraph conda-forge::leidenalg" + container "community.wave.seqera.io/library/scanpy:1.10.2--e83da2205b92a538" + + input: + tuple val(meta), path(inputs) + path star_index + + output: + tuple val(meta2), path("${meta.id}/*h5ad"), emit: h5ad + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Get a file to check input type. Some aligners bring arrays instead of a single file. + def input_to_check = (inputs instanceof String) ? inputs : inputs[0] + + // check input type of inputs + input_type = (input_to_check.toUriString().contains('raw')) ? 'raw' : 'filtered' + meta2 = meta + [input_type: input_type] + + template 'mtx_to_h5ad_star.py' + + stub: + """ + mkdir ${meta.id} + touch ${meta.id}/${meta.id}_matrix.h5ad + touch versions.yml + """ +} diff --git a/modules/local/templates/anndatar_convert.R b/modules/local/templates/anndatar_convert.R new file mode 100755 index 00000000..479ac912 --- /dev/null +++ b/modules/local/templates/anndatar_convert.R @@ -0,0 +1,15 @@ +#!/usr/bin/env Rscript + +# to use nf variables: "${meta.id}" + +# load libraries +library(anndataR) + +# read input +adata <- read_h5ad("${h5ad}") + +# convert to Rds +obj <- adata\$to_Seurat() + +# save files +saveRDS(obj, file = "${meta.id}_standardized.Rds") diff --git a/modules/local/templates/barcodes.py b/modules/local/templates/barcodes.py new file mode 100644 index 00000000..8a9b10a7 --- /dev/null +++ b/modules/local/templates/barcodes.py @@ -0,0 +1,44 @@ +#!/usr/bin/env python3 + +import platform +import anndata as ad +import pandas as pd + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +df = pd.read_csv("${barcodes_csv}", header=None) +adata = ad.read_h5ad("${h5ad}") + +adata = adata[df[0].values] + +adata.write_h5ad("${prefix}.h5ad") + +# Versions + +versions = { + "${task.process}": { + "python": platform.python_version(), + "anndata": ad.__version__, + "pandas": pd.__version__ + } +} + +with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) diff --git a/bin/concat_h5ad.py b/modules/local/templates/concat_h5ad.py similarity index 53% rename from bin/concat_h5ad.py rename to modules/local/templates/concat_h5ad.py index 43ea071a..033bc89a 100755 --- a/bin/concat_h5ad.py +++ b/modules/local/templates/concat_h5ad.py @@ -7,7 +7,6 @@ import scanpy as sc, anndata as ad, pandas as pd from pathlib import Path -import argparse def read_samplesheet(samplesheet): @@ -17,36 +16,24 @@ def read_samplesheet(samplesheet): # samplesheet may contain replicates, when it has, # group information from replicates and collapse with commas # only keep unique values using set() - df = df.groupby(["sample"]).agg(lambda column: ",".join(set(column))) + df = df.groupby(["sample"]).agg(lambda column: ",".join(set(str(column)))) return df if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Concatenates h5ad files and merge metadata from samplesheet") - - parser.add_argument("-i", "--input", dest="input", help="Path to samplesheet.csv") - parser.add_argument("-o", "--out", dest="out", help="Output path.") - parser.add_argument( - "-s", - "--suffix", - dest="suffix", - help="Suffix of matrices to remove and get sample name", - ) - - args = vars(parser.parse_args()) # Open samplesheet as dataframe - df_samplesheet = read_samplesheet(args["input"]) + df_samplesheet = read_samplesheet("${samplesheet}") # find all h5ad and append to dict - dict_of_h5ad = {str(path).replace(args["suffix"], ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} + dict_of_h5ad = {str(path).replace("_matrix.h5ad", ""): sc.read_h5ad(path) for path in Path(".").rglob("*.h5ad")} # concat h5ad files adata = ad.concat(dict_of_h5ad, label="sample", merge="unique", index_unique="_") # merge with data.frame, on sample information - adata.obs = adata.obs.join(df_samplesheet, on="sample") - adata.write_h5ad(args["out"], compression="gzip") + adata.obs = adata.obs.join(df_samplesheet, on="sample").astype(str) + adata.write_h5ad("combined_${meta.input_type}_matrix.h5ad", compression="gzip") - print("Wrote h5ad file to {}".format(args["out"])) + print("Wrote h5ad file to {}".format("combined_${meta.input_type}_matrix.h5ad")) diff --git a/modules/local/templates/mtx_to_h5ad_star.py b/modules/local/templates/mtx_to_h5ad_star.py new file mode 100755 index 00000000..48749114 --- /dev/null +++ b/modules/local/templates/mtx_to_h5ad_star.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python + +# Set numba chache dir to current working directory (which is a writable mount also in containers) +import os + +os.environ["NUMBA_CACHE_DIR"] = "." + +import scanpy as sc +import pandas as pd +import argparse +from anndata import AnnData +import platform + +def _mtx_to_adata( + input: str, + sample: str, +): + adata = sc.read_10x_mtx(input) + adata.obs["sample"] = sample + + return adata + + +def format_yaml_like(data: dict, indent: int = 0) -> str: + """Formats a dictionary to a YAML-like string. + Args: + data (dict): The dictionary to format. + indent (int): The current indentation level. + Returns: + str: A string formatted as YAML. + """ + yaml_str = "" + for key, value in data.items(): + spaces = " " * indent + if isinstance(value, dict): + yaml_str += f"{spaces}{key}:\\n{format_yaml_like(value, indent + 1)}" + else: + yaml_str += f"{spaces}{key}: {value}\\n" + return yaml_str + +def dump_versions(): + versions = { + "${task.process}": { + "python": platform.python_version(), + "scanpy": sc.__version__, + "pandas": pd.__version__ + } + } + + with open("versions.yml", "w") as f: + f.write(format_yaml_like(versions)) + +def input_to_adata( + input_data: str, + output: str, + sample: str, +): + print(f"Reading in {input_data}") + + # open main data + adata = _mtx_to_adata(input_data, sample) + + # standard format + # index are gene IDs and symbols are a column + adata.var["gene_symbol"] = adata.var.index + adata.var['gene_versions'] = adata.var["gene_ids"] + adata.var['gene_ids'] = adata.var['gene_versions'].str.split('.').str[0] + adata.var.index = adata.var["gene_ids"].values + adata.var = adata.var.drop("gene_ids", axis=1) + + # write results + adata.write_h5ad(f"{output}", compression="gzip") + print(f"Wrote h5ad file to {output}") + + # dump versions + dump_versions() + + return adata + +# +# Run main script +# + +# create the directory with the sample name +os.makedirs("${meta.id}", exist_ok=True) + +# input_type comes from NF module +adata = input_to_adata( + input_data="${input_type}", + output="${meta.id}/${meta.id}_${input_type}_matrix.h5ad", + sample="${meta.id}" +) diff --git a/modules/nf-core/cellbender/removebackground/environment.yml b/modules/nf-core/cellbender/removebackground/environment.yml new file mode 100644 index 00000000..a157c522 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/environment.yml @@ -0,0 +1,5 @@ +channels: + - conda-forge + - bioconda +dependencies: + - bioconda::cellbender=0.3.0 diff --git a/modules/nf-core/cellbender/removebackground/main.nf b/modules/nf-core/cellbender/removebackground/main.nf new file mode 100644 index 00000000..f3cfd1ff --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/main.nf @@ -0,0 +1,65 @@ +process CELLBENDER_REMOVEBACKGROUND { + tag "$meta.id" + label 'process_medium' + label 'process_gpu' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'oras://community.wave.seqera.io/library/cellbender:0.3.0--c4addb97ab2d83fe': + 'community.wave.seqera.io/library/cellbender:0.3.0--41318a055fc3aacb' }" + + input: + tuple val(meta), path(h5ad) + + output: + tuple val(meta), path("${prefix}.h5") , emit: h5 + tuple val(meta), path("${prefix}_filtered.h5") , emit: filtered_h5 + tuple val(meta), path("${prefix}_posterior.h5") , emit: posterior_h5 + tuple val(meta), path("${prefix}_cell_barcodes.csv"), emit: barcodes + tuple val(meta), path("${prefix}_metrics.csv") , emit: metrics + tuple val(meta), path("${prefix}_report.html") , emit: report + tuple val(meta), path("${prefix}.pdf") , emit: pdf + tuple val(meta), path("${prefix}.log") , emit: log + tuple val(meta), path("ckpt.tar.gz") , emit: checkpoint + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + prefix = task.ext.prefix ?: "${meta.id}" + args = task.ext.args ?: "" + use_gpu = task.ext.use_gpu ? "--cuda" : "" + """ + TMPDIR=. cellbender remove-background \ + ${args} \ + --cpu-threads ${task.cpus} \ + ${use_gpu} \ + --input ${h5ad} \ + --output ${prefix}.h5 + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellbender: \$(cellbender --version) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + touch "${prefix}.h5" + touch "${prefix}_filtered.h5" + touch "${prefix}_posterior.h5" + touch "${prefix}_cell_barcodes.csv" + touch "${prefix}_metrics.csv" + touch "${prefix}_report.html" + touch "${prefix}.pdf" + touch "${prefix}.log" + touch "ckpt.tar.gz" + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cellbender: \$(cellbender --version) + END_VERSIONS + """ +} diff --git a/modules/nf-core/cellbender/removebackground/meta.yml b/modules/nf-core/cellbender/removebackground/meta.yml new file mode 100644 index 00000000..d70fa3fd --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/meta.yml @@ -0,0 +1,75 @@ +name: cellbender_removebackground +description: Module to use CellBender to estimate ambient RNA from single-cell RNA-seq data +keywords: + - single-cell + - scRNA-seq + - ambient RNA removal +tools: + - cellbender: + description: CellBender is a software package for eliminating technical artifacts from high-throughput single-cell RNA sequencing (scRNA-seq) data. + documentation: https://cellbender.readthedocs.io/en/latest/ + tool_dev_url: https://github.com/broadinstitute/CellBender + licence: ["BSD-3-Clause"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test' ] + - h5ad: + type: file + description: AnnData file containing unfiltered data (with empty droplets) + pattern: "*.h5ad" +output: + - h5: + type: file + description: Full count matrix as an h5 file, with background RNA removed. This file contains all the original droplet barcodes. + pattern: "*.h5" + - filtered_h5: + type: file + description: | + Full count matrix as an h5 file, with background RNA removed. This file contains only the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. + pattern: "*.h5" + - posterior_h5: + type: file + description: | + The full posterior probability of noise counts. This is not normally used downstream. + pattern: "*.h5" + - barcodes: + type: file + description: | + CSV file containing all the droplet barcodes which were determined to have a > 50% posterior probability of containing cells. | + Barcodes are written in plain text. This information is also contained in each of the above outputs, | + but is included as a separate output for convenient use in certain downstream applications. + pattern: "*.csv" + - metrics: + type: file + description: | + Metrics describing the run, potentially to be used to flag problematic runs | + when using CellBender as part of a large-scale automated pipeline. + pattern: "*.csv" + - report: + type: file + description: | + HTML report including plots and commentary, along with any warnings or suggestions for improved parameter settings. + pattern: "*.html" + - pdf: + type: file + description: PDF file that provides a standard graphical summary of the inference procedure. + pattern: "*.pdf" + - log: + type: file + description: Log file produced by the cellbender remove-background run. + pattern: "*.log" + - checkpoint: + type: file + description: Checkpoint file which contains the trained model and the full posterior. + pattern: "*.ckpt" + - versions: + type: file + description: File containing software version + pattern: "versions.yml" +authors: + - "@nictru" +maintainers: + - "@nictru" diff --git a/modules/nf-core/cellbender/removebackground/tests/epochs.config b/modules/nf-core/cellbender/removebackground/tests/epochs.config new file mode 100644 index 00000000..96282b07 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/epochs.config @@ -0,0 +1,6 @@ + +process { + withName: CELLBENDER_REMOVEBACKGROUND { + ext.args = '--epochs 20' + } +} diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test b/modules/nf-core/cellbender/removebackground/tests/main.nf.test new file mode 100644 index 00000000..1afa6f3b --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test @@ -0,0 +1,66 @@ +nextflow_process { + name 'Test Process CELLBENDER_REMOVEBACKGROUND' + script '../main.nf' + process 'CELLBENDER_REMOVEBACKGROUND' + + tag "modules" + tag "modules_nfcore" + tag "cellbender/removebackground" + tag "cellbender" + + test("test_cellbender_removebackground") { + config './epochs.config' + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert file(process.out.h5.get(0).get(1)).exists()}, + {assert file(process.out.filtered_h5.get(0).get(1)).exists()}, + {assert file(process.out.posterior_h5.get(0).get(1)).exists()}, + {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes")}, + {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics")}, + {assert file(process.out.report.get(0).get(1)).exists()}, + {assert file(process.out.pdf.get(0).get(1)).exists()}, + {assert file(process.out.log.get(0).get(1)).exists()}, + {assert snapshot(process.out.versions).match("cellbender_removebackground_versions")} + ) + } + } + + test("test_cellbender_removebackground - stub") { + options '-stub' + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file("https://raw.githubusercontent.com/nf-core/test-datasets/scdownstream/samples/SAMN14430799_raw_matrix_5k.h5ad", checkIfExists: true) + ] + """ + } + } + then { + assertAll( + {assert process.success}, + {assert snapshot(process.out.h5).match("cellbender_removebackground_h5_stub")}, + {assert snapshot(process.out.filtered_h5).match("cellbender_removebackground_filtered_h5_stub")}, + {assert snapshot(process.out.posterior_h5).match("cellbender_removebackground_posterior_h5_stub")}, + {assert snapshot(process.out.barcodes).match("cellbender_removebackground_barcodes_stub")}, + {assert snapshot(process.out.metrics).match("cellbender_removebackground_metrics_stub")}, + {assert snapshot(process.out.report).match("cellbender_removebackground_report_stub")}, + {assert snapshot(process.out.pdf).match("cellbender_removebackground_pdf_stub")}, + {assert snapshot(process.out.log).match("cellbender_removebackground_log_stub")}, + {assert snapshot(process.out.versions).match("cellbender_removebackground_versions_stub")} + ) + } + } +} diff --git a/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap new file mode 100644 index 00000000..fdb51d66 --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "cellbender_removebackground_versions": { + "content": [ + [ + "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.33127881" + }, + "cellbender_removebackground_filtered_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_filtered.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.833598082" + }, + "cellbender_removebackground_pdf_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.pdf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.891829278" + }, + "cellbender_removebackground_metrics": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_metrics.csv:md5,88272bde1c157528b0b0ab2abe5ad26f" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.327155805" + }, + "cellbender_removebackground_versions_stub": { + "content": [ + [ + "versions.yml:md5,b236ac7595dfa6cd4d51ac73e51cb05a" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.904614838" + }, + "cellbender_removebackground_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.829304361" + }, + "cellbender_removebackground_metrics_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_metrics.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.870469733" + }, + "cellbender_removebackground_log_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.899293304" + }, + "cellbender_removebackground_barcodes": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_cell_barcodes.csv:md5,c8e8df9d0f9aea976d6f6aa36d329429" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:09.316098811" + }, + "cellbender_removebackground_report_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_report.html:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.885307244" + }, + "cellbender_removebackground_posterior_h5_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_posterior.h5:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.838032754" + }, + "cellbender_removebackground_barcodes_stub": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_cell_barcodes.csv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-08-12T13:41:20.861284979" + } +} \ No newline at end of file diff --git a/modules/nf-core/cellbender/removebackground/tests/tags.yml b/modules/nf-core/cellbender/removebackground/tests/tags.yml new file mode 100644 index 00000000..d935083b --- /dev/null +++ b/modules/nf-core/cellbender/removebackground/tests/tags.yml @@ -0,0 +1,2 @@ +cellbender/removebackground: + - modules/nf-core/cellbender/removebackground/** diff --git a/subworkflows/local/emptydrops_removal.nf b/subworkflows/local/emptydrops_removal.nf new file mode 100644 index 00000000..202612d6 --- /dev/null +++ b/subworkflows/local/emptydrops_removal.nf @@ -0,0 +1,25 @@ +include { CELLBENDER_REMOVEBACKGROUND } from '../../modules/nf-core/cellbender/removebackground' +include { ADATA_BARCODES } from '../../modules/local/adata_barcodes' + +workflow EMPTY_DROPLET_REMOVAL { + take: + ch_unfiltered + + main: + ch_versions = Channel.empty() + + CELLBENDER_REMOVEBACKGROUND(ch_unfiltered) + ch_versions = ch_versions.mix(CELLBENDER_REMOVEBACKGROUND.out.versions) + + ch_combined = ch_unfiltered.join(CELLBENDER_REMOVEBACKGROUND.out.barcodes) + + ADATA_BARCODES(ch_combined) + ch_versions = ch_versions.mix(ADATA_BARCODES.out.versions) + + ch_h5ad = ADATA_BARCODES.out.h5ad + + emit: + h5ad = ch_h5ad + + versions = ch_versions +} diff --git a/subworkflows/local/mtx_conversion.nf b/subworkflows/local/mtx_conversion.nf index 98e49a2e..6435966d 100644 --- a/subworkflows/local/mtx_conversion.nf +++ b/subworkflows/local/mtx_conversion.nf @@ -1,62 +1,41 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad.nf' -include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' -include { MTX_TO_SEURAT } from '../../modules/local/mtx_to_seurat.nf' +include { CONCAT_H5AD } from '../../modules/local/concat_h5ad.nf' +include { ANNDATAR_CONVERT } from '../../modules/local/anndatar_convert' workflow MTX_CONVERSION { take: mtx_matrices samplesheet - txp2gene - star_index main: ch_versions = Channel.empty() - // Cellranger module output contains too many files which cause path collisions, we filter to the ones we need. - // Keeping backwards compatibility with cellranger-arc. - // TODO: Adapt cellranger-arc subworkflow like cellranger to remove this snippet here. - if (params.aligner in [ 'cellrangerarc' ]) { - mtx_matrices = mtx_matrices.map { meta, mtx_files -> - [ meta, mtx_files.findAll { it.toString().contains("filtered_feature_bc_matrix") } ] - } - .filter { meta, mtx_files -> mtx_files } // Remove any that are missing the relevant files - } - // - // Convert matrix to h5ad + // MODULE: Convert to Rds with AnndataR package // - MTX_TO_H5AD ( - mtx_matrices, - txp2gene, - star_index + ANNDATAR_CONVERT ( + mtx_matrices ) // // Concat sample-specific h5ad in one // - ch_concat_h5ad_input = MTX_TO_H5AD.out.h5ad.groupTuple() // gather all sample-specific files / per type + ch_concat_h5ad_input = mtx_matrices.groupTuple() // gather all sample-specific files / per type if (params.aligner == 'kallisto' && params.kb_workflow != 'standard') { // when having spliced / unspliced matrices, the collected tuple has two levels ( [[mtx_1, mtx_2]] ) // which nextflow break because it is not a valid 'path' thus, we have to remove one level // making it as [ mtx_1, mtx_2 ] ch_concat_h5ad_input = ch_concat_h5ad_input.map{ type, matrices -> [ type, matrices.flatten().toList() ] } } + CONCAT_H5AD ( ch_concat_h5ad_input, samplesheet ) - // - // Convert matrix do seurat - // - MTX_TO_SEURAT ( - mtx_matrices - ) - //TODO CONCAT h5ad and MTX to h5ad should also have versions.yaml output - ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions) + // ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions, MTX_TO_SEURAT.out.versions) emit: ch_versions diff --git a/subworkflows/local/starsolo.nf b/subworkflows/local/starsolo.nf index 0c11acd1..8236632d 100644 --- a/subworkflows/local/starsolo.nf +++ b/subworkflows/local/starsolo.nf @@ -1,5 +1,6 @@ /* -- IMPORT LOCAL MODULES/SUBWORKFLOWS -- */ -include { STAR_ALIGN } from '../../modules/local/star_align' +include { STAR_ALIGN } from '../../modules/local/star_align' +include { MTX_TO_H5AD_STAR as MTX_TO_H5AD } from '../../modules/local/mtx_to_h5ad_star' /* -- IMPORT NF-CORE MODULES/SUBWORKFLOWS -- */ include { GUNZIP } from '../../modules/nf-core/gunzip/main' @@ -53,14 +54,21 @@ workflow STARSOLO { ) ch_versions = ch_versions.mix(STAR_ALIGN.out.versions) + /* + * Perform h5ad conversion + */ + MTX_TO_H5AD ( + STAR_ALIGN.out.raw_counts.mix( STAR_ALIGN.out.filtered_counts ), + star_index.map{ meta, index -> index } + ) + ch_versions = ch_versions.mix(MTX_TO_H5AD.out.versions.first()) + emit: ch_versions // get rid of meta for star index - star_index = star_index.map{ meta, index -> index } star_result = STAR_ALIGN.out.tab star_counts = STAR_ALIGN.out.counts - raw_counts = STAR_ALIGN.out.raw_counts - filtered_counts = STAR_ALIGN.out.filtered_counts + star_h5ad = MTX_TO_H5AD.out.h5ad for_multiqc = STAR_ALIGN.out.log_final.map{ meta, it -> it } } diff --git a/workflows/scrnaseq.nf b/workflows/scrnaseq.nf index 10ced221..1e7ea9ef 100644 --- a/workflows/scrnaseq.nf +++ b/workflows/scrnaseq.nf @@ -3,13 +3,13 @@ include { FASTQC_CHECK } from '../subworkflows/local/fastq include { KALLISTO_BUSTOOLS } from '../subworkflows/local/kallisto_bustools' include { SCRNASEQ_ALEVIN } from '../subworkflows/local/alevin' include { STARSOLO } from '../subworkflows/local/starsolo' -include { CELLRANGER_ALIGN } from "../subworkflows/local/align_cellranger" -include { CELLRANGER_MULTI_ALIGN } from "../subworkflows/local/align_cellrangermulti" -include { CELLRANGERARC_ALIGN } from "../subworkflows/local/align_cellrangerarc" -include { UNIVERSC_ALIGN } from "../subworkflows/local/align_universc" -include { MTX_CONVERSION } from "../subworkflows/local/mtx_conversion" +include { CELLRANGER_ALIGN } from '../subworkflows/local/align_cellranger' +include { CELLRANGER_MULTI_ALIGN } from '../subworkflows/local/align_cellrangermulti' +include { CELLRANGERARC_ALIGN } from '../subworkflows/local/align_cellrangerarc' +include { UNIVERSC_ALIGN } from '../subworkflows/local/align_universc' +include { EMPTY_DROPLET_REMOVAL } from '../subworkflows/local/emptydrops_removal' +include { MTX_CONVERSION } from '../subworkflows/local/mtx_conversion' include { GTF_GENE_FILTER } from '../modules/local/gtf_gene_filter' -include { EMPTYDROPS_CELL_CALLING } from '../modules/local/emptydrops' include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main' include { GUNZIP as GUNZIP_GTF } from '../modules/nf-core/gunzip/main' include { paramsSummaryMultiqc } from '../subworkflows/nf-core/utils_nfcore_pipeline' @@ -86,8 +86,8 @@ workflow SCRNASEQ { ch_multi_samplesheet = params.cellranger_multi_barcodes ? file(params.cellranger_multi_barcodes, checkIfExists: true) : [] empty_file = file("$projectDir/assets/EMPTY", checkIfExists: true) - ch_versions = Channel.empty() - ch_mtx_matrices = Channel.empty() + ch_versions = Channel.empty() + ch_h5ad_matrices = Channel.empty() // Run FastQC if (!params.skip_fastqc) { @@ -137,7 +137,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(KALLISTO_BUSTOOLS.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts) + ch_h5ad_matrices = ch_h5ad_matrices.mix(KALLISTO_BUSTOOLS.out.raw_counts, KALLISTO_BUSTOOLS.out.filtered_counts) ch_txp2gene = KALLISTO_BUSTOOLS.out.txp2gene } @@ -155,7 +155,7 @@ workflow SCRNASEQ { ) ch_versions = ch_versions.mix(SCRNASEQ_ALEVIN.out.ch_versions) ch_multiqc_files = ch_multiqc_files.mix(SCRNASEQ_ALEVIN.out.alevin_results.map{ meta, it -> it }) - ch_mtx_matrices = ch_mtx_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) + ch_h5ad_matrices = ch_h5ad_matrices.mix(SCRNASEQ_ALEVIN.out.alevin_results) } // Run STARSolo pipeline @@ -171,9 +171,8 @@ workflow SCRNASEQ { protocol_config.get('extra_args', ""), ) ch_versions = ch_versions.mix(STARSOLO.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(STARSOLO.out.raw_counts, STARSOLO.out.filtered_counts) - ch_star_index = STARSOLO.out.star_index ch_multiqc_files = ch_multiqc_files.mix(STARSOLO.out.for_multiqc) + ch_h5ad_matrices = STARSOLO.out.star_h5ad } // Run cellranger pipeline @@ -186,7 +185,6 @@ workflow SCRNASEQ { protocol_config['protocol'] ) ch_versions = ch_versions.mix(CELLRANGER_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_ALIGN.out.cellranger_matrices) ch_star_index = CELLRANGER_ALIGN.out.star_index ch_multiqc_files = ch_multiqc_files.mix(CELLRANGER_ALIGN.out.cellranger_out.map{ meta, outs -> outs.findAll{ it -> it.name == "web_summary.html"} @@ -203,7 +201,7 @@ workflow SCRNASEQ { ch_fastq ) ch_versions = ch_versions.mix(UNIVERSC_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(UNIVERSC_ALIGN.out.universc_out) + ch_h5ad_matrices = ch_h5ad_matrices.mix(UNIVERSC_ALIGN.out.universc_out) } // Run cellrangerarc pipeline @@ -217,7 +215,7 @@ workflow SCRNASEQ { ch_cellrangerarc_config ) ch_versions = ch_versions.mix(CELLRANGERARC_ALIGN.out.ch_versions) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) + ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGERARC_ALIGN.out.cellranger_arc_out) } // Run cellrangermulti pipeline @@ -285,38 +283,34 @@ workflow SCRNASEQ { ch_multiqc_files = ch_multiqc_files.mix( CELLRANGER_MULTI_ALIGN.out.cellrangermulti_out.map{ meta, outs -> outs.findAll{ it -> it.name == "web_summary.html" } }) - ch_mtx_matrices = ch_mtx_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx) + ch_h5ad_matrices = ch_h5ad_matrices.mix(CELLRANGER_MULTI_ALIGN.out.cellrangermulti_mtx) } - // Run emptydrops calling module + // SUBWORKFLOW: Run cellbender emptydrops filter if ( !params.skip_emptydrops && !(params.aligner in ['cellrangerarc']) ) { // // emptydrops should only run on the raw matrices thus, filter-out the filtered result of the aligners that can produce it // if ( params.aligner in [ 'cellranger', 'cellrangermulti', 'kallisto', 'star' ] ) { - ch_mtx_matrices_for_emptydrops = - ch_mtx_matrices.filter { meta, mtx_files -> - mtx_files.toString().contains("raw_feature_bc_matrix") || // cellranger - mtx_files.toString().contains("counts_unfiltered") || // kallisto - mtx_files.toString().contains("raw") // star - } + ch_h5ad_matrices_for_emptydrops = + ch_h5ad_matrices.filter { meta, mtx_files -> meta.input_type == 'raw' } } else { - ch_mtx_matrices_for_emptydrops = ch_mtx_matrices + ch_h5ad_matrices_for_emptydrops = ch_h5ad_matrices } - EMPTYDROPS_CELL_CALLING( ch_mtx_matrices_for_emptydrops ) - ch_mtx_matrices = ch_mtx_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices ) + EMPTY_DROPLET_REMOVAL ( + ch_h5ad_matrices_for_emptydrops + ) + // ch_h5ad_matrices = ch_h5ad_matrices.mix( EMPTYDROPS_CELL_CALLING.out.filtered_matrices ) } // Run mtx to h5ad conversion subworkflow MTX_CONVERSION ( - ch_mtx_matrices, - ch_input, - ch_txp2gene, - ch_star_index + ch_h5ad_matrices, + ch_input ) //Add Versions from MTX Conversion workflow too