diff --git a/.nf-core.yml b/.nf-core.yml new file mode 100644 index 0000000..3805dc8 --- /dev/null +++ b/.nf-core.yml @@ -0,0 +1 @@ +repository_type: pipeline diff --git a/MANIFEST.in b/MANIFEST.in index 30df624..8595e41 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -3,7 +3,7 @@ recursive-include bin recursive-include conf recursive-include assets recursive-include modules -recursive-include submodules +recursive-include subworkflows include CITATION.cff include LICENSE include VERSION diff --git a/main.nf b/main.nf index bafe17f..b0ab3cb 100644 --- a/main.nf +++ b/main.nf @@ -17,10 +17,10 @@ input : ${params.input} .stripIndent() // SUBMODULES -include { INPUT_CHECK } from './submodules/local/input_check.nf' -include { TRIM_COUNT } from './submodules/local/trim_count.nf' -include { MAGECK } from './submodules/local/mageck.nf' -include { BAGEL } from './submodules/local/bagel.nf' +include { INPUT_CHECK } from './subworkflows/local/input_check.nf' +include { TRIM_COUNT } from './subworkflows/local/trim_count.nf' +include { MAGECK } from './subworkflows/local/mageck.nf' +include { BAGEL } from './subworkflows/local/bagel.nf' // MODULES include { DRUGZ } from './modules/local/drugz.nf' diff --git a/modules.json b/modules.json new file mode 100644 index 0000000..a006ca4 --- /dev/null +++ b/modules.json @@ -0,0 +1,17 @@ +{ + "name": "CCBR/CRUISE", + "homePage": "https://github.com/CCBR/CRUISE", + "repos": { + "https://github.com/CCBR/nf-modules": { + "modules": { + "CCBR": { + "cutadapt": { + "branch": "main", + "git_sha": "a3525c765b76400269695235bbf164a948604c4a", + "installed_by": ["modules"] + } + } + } + } + } +} diff --git a/modules/CCBR/cutadapt/main.nf b/modules/CCBR/cutadapt/main.nf new file mode 100644 index 0000000..775ca2c --- /dev/null +++ b/modules/CCBR/cutadapt/main.nf @@ -0,0 +1,63 @@ +process CUTADAPT { + tag { meta.id } + label 'process_high' + + container 'nciccbr/ncigb_cutadapt_v1.18:latest' + + input: + tuple val(meta), path(reads) + + output: + tuple val(meta), path('*.trim.fastq.gz'), emit: reads + tuple val(meta), path('*.log') , emit: log + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "-o ${prefix}.trim.fastq.gz" : "-o ${prefix}_1.trim.fastq.gz -p ${prefix}_2.trim.fastq.gz" + def args = task.ext.args ?: [ + '--nextseq-trim=2', + '--trim-n -n 5 -O 5', + '-q 10,10', + '-b file:/opt2/TruSeq_and_nextera_adapters.consolidated.fa' + ] + if (meta.single_end) { + args += [ + '-m 20' + ] + } else { + args += [ + '-B file:/opt2/TruSeq_and_nextera_adapters.consolidated.fa', + '-m 20:20', + ] + } + args = args.join(' ').trim() + """ + cutadapt \\ + --cores ${task.cpus} \\ + ${args} \\ + ${trimmed} \\ + ${reads} \\ + > ${prefix}.cutadapt.log + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + def trimmed = meta.single_end ? "${prefix}.trim.fastq.gz" : "${prefix}_1.trim.fastq.gz ${prefix}_2.trim.fastq.gz" + """ + touch ${prefix}.cutadapt.log + touch ${trimmed} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + cutadapt: \$(cutadapt --version) + END_VERSIONS + """ +} diff --git a/modules/CCBR/cutadapt/meta.yml b/modules/CCBR/cutadapt/meta.yml new file mode 100644 index 0000000..b15b1af --- /dev/null +++ b/modules/CCBR/cutadapt/meta.yml @@ -0,0 +1,45 @@ +name: cutadapt +description: Perform adapter/quality trimming on sequencing reads. Adapted from the nf-core cutadapt module. +keywords: + - trimming + - adapter trimming + - adapters + - quality trimming +tools: + - cuatadapt: + description: | + Cutadapt finds and removes adapter sequences, primers, poly-A tails and other types of unwanted sequence from your high-throughput sequencing reads. + documentation: https://cutadapt.readthedocs.io/en/stable/index.html + doi: 10.14806/ej.17.1.200 + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: | + List of input FastQ files of size 1 and 2 for single-end and paired-end data, + respectively. +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - reads: + type: file + description: The trimmed/modified fastq reads + pattern: "*fastq.gz" + - log: + type: file + description: cuatadapt log file + pattern: "*cutadapt.log" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@kelly-sovacool" diff --git a/modules/local/trim.nf b/modules/local/trim.nf deleted file mode 100644 index ad3a883..0000000 --- a/modules/local/trim.nf +++ /dev/null @@ -1,41 +0,0 @@ -// TODO move this to shared CCBR modules repo -process TRIM_SE { - tag { meta.id } - label 'qc' - label 'process_high' - container "${params.containers.cutadapt}" - - input: - tuple val(meta), path(fastq) - - output: - tuple val(meta), path("*.fastq.gz"), emit: reads - - script: - def prefix = task.ext.prefix ?: "${meta.id}" - if (meta.single_end) { - """ - nseqs_raw=\$(zgrep "^@" ${fastq} | wc -l) - echo "\$nseqs_raw in ${fastq}" - cutadapt \ - --nextseq-trim=2 \ - --trim-n \ - -n 5 -O 5 \ - -q ${params.cutadapt.leadingquality},${params.cutadapt.trailingquality} \ - -m ${params.cutadapt.minlen} \ - -b file:${params.cutadapt.adapters} \ - -j $task.cpus \ - $fastq |\ - pigz -p ${task.cpus} > ${prefix}.trimmed.fastq.gz - nseqs_trimmed=\$(zgrep "^@" ${prefix}.trimmed.fastq.gz | wc -l) - echo "\$nseqs_trimmed in ${prefix}.trimmed.fastq.gz" - """ - } else { - "paired end reads are not supported yet" - } - - stub: - """ - touch ${meta.id}.trimmed.fastq.gz - """ -} diff --git a/pyproject.toml b/pyproject.toml index 208c6f8..4f8f1bc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,7 +61,7 @@ Changelog = "https://github.com/CCBR/CRUISE/blob/main/docs/CHANGELOG.md" cruise = "." [tool.setuptools.package-data] -"*" = ["CITATION.cff", "LICENSE", "VERSION", "main.nf", "nextflow.config", "assets/*", "bin/*", "conf/*", "modules/*/*", "submodules/*/*"] +"*" = ["CITATION.cff", "LICENSE", "VERSION", "main.nf", "nextflow.config", "assets/*", "bin/*", "conf/*", "modules/*/*", "subworkflows/*/*"] [tool.setuptools.dynamic] version = {file = "VERSION"} diff --git a/submodules/local/bagel.nf b/subworkflows/local/bagel.nf similarity index 100% rename from submodules/local/bagel.nf rename to subworkflows/local/bagel.nf diff --git a/submodules/local/input_check.nf b/subworkflows/local/input_check.nf similarity index 100% rename from submodules/local/input_check.nf rename to subworkflows/local/input_check.nf diff --git a/submodules/local/mageck.nf b/subworkflows/local/mageck.nf similarity index 100% rename from submodules/local/mageck.nf rename to subworkflows/local/mageck.nf diff --git a/submodules/local/trim_count.nf b/subworkflows/local/trim_count.nf similarity index 72% rename from submodules/local/trim_count.nf rename to subworkflows/local/trim_count.nf index 9847f00..ef12f53 100644 --- a/submodules/local/trim_count.nf +++ b/subworkflows/local/trim_count.nf @@ -1,4 +1,4 @@ -include { TRIM_SE } from '../../modules/local/trim.nf' +include { CUTADAPT } from '../../modules/CCBR/cutadapt' include { COUNT as MAGECK_COUNT } from "../../modules/local/mageck.nf" workflow TRIM_COUNT { @@ -7,8 +7,8 @@ workflow TRIM_COUNT { library main: - TRIM_SE(raw_reads) - TRIM_SE.out.reads + CUTADAPT(raw_reads) + CUTADAPT.out.reads .multiMap { meta, fastq -> id: meta.id fastq: fastq @@ -22,5 +22,5 @@ workflow TRIM_COUNT { emit: count = MAGECK_COUNT.out.count - trimmed_reads = TRIM_SE.out.reads + trimmed_reads = CUTADAPT.out.reads }