From 75a23400b7a736ec140b76870ea82595b71258c2 Mon Sep 17 00:00:00 2001 From: yangliu Date: Tue, 1 Jul 2025 17:57:22 +0800 Subject: [PATCH 1/8] usc config --- conf/examples/test_dorado.config | 4 +-- conf/executors/usc.config | 53 ++++++++++++++++++++++++++++++++ nextflow.config | 2 ++ 3 files changed, 57 insertions(+), 2 deletions(-) create mode 100755 conf/executors/usc.config diff --git a/conf/examples/test_dorado.config b/conf/examples/test_dorado.config index c3c93ccc..f0912be3 100644 --- a/conf/examples/test_dorado.config +++ b/conf/examples/test_dorado.config @@ -1,10 +1,10 @@ /* * ------------------------------------------------- - * Nextflow config file for CI human test case + * Nextflow config file for CI human test case for Dorado * ------------------------------------------------- * Defines bundled input files and everything required * to run a fast and simple test. Use as follows: - * nextflow run LabShengLi/nanome -profile test, + * nextflow run LabShengLi/nanome -profile test_dorado, */ params{ diff --git a/conf/executors/usc.config b/conf/executors/usc.config new file mode 100755 index 00000000..0dcd5fab --- /dev/null +++ b/conf/executors/usc.config @@ -0,0 +1,53 @@ +/* + * ------------------------------------------------- + * Nextflow default input from USC CARC HPC + * ------------------------------------------------- + * Defines bundled specific input data from USC CARC HPC + * + */ +params{ + max_cpus = 16 + max_memory = 128.GB + + gpu_queue = 'gpu' + // gpu_qos = null + gpu_processors = 4 + gpu_memory = '64.GB' + gpu_time = '2.d' + gpu_gresOptions = 'gpu:a100:1' + + cpu_queue = 'main' + // cpu_qos = null + cpu_processors = 8 + cpu_memory = '128.GB' + cpu_time = '2.d' + + queueSize = 24 + // used for singularity identify both file systems + containerOptions = '--nv -B /project/sli68423_1316 -B /scratch1/yliu8962' +} + +process{ + executor = "slurm" + // module = "slurm:singularity" + containerOptions = params.containerOptions + + withName: 'UNTAR|ALIGNMENT|QCEXPORT|RESQUIGGLE|NANOPOLISH|Tombo|Guppy6Comb|METEORE|CLAIR3|PHASING|CONSENSUS|EVAL|REPORT|NPLSHCOMB|MGLDNCOMB|DPSIGCOMB|DEEPSIGNAL2COMB|GuppyComb|TomboComb|DpmodComb|Guppy6Comb' { + queue = params.cpu_queue + cpus = params.cpu_processors + memory = params.cpu_memory + time = params.cpu_time + } + + withName: 'ENVCHECK|BASECALL|MEGALODON|Guppy6|Guppy|DEEPSIGNAL|DEEPSIGNAL2|DeepMod|METEORE|DORADO_CALL' { + queue = params.gpu_queue + cpus = params.gpu_processors + memory = params.gpu_memory + time = params.gpu_time + clusterOptions = " ${params.gpu_gresOptions ? '--gres=' + params.gpu_gresOptions : ' '}" + } +} + +executor { + queueSize = params.queueSize +} diff --git a/nextflow.config b/nextflow.config index f228c094..6b46e287 100755 --- a/nextflow.config +++ b/nextflow.config @@ -266,6 +266,8 @@ profiles { jax { includeConfig 'conf/executors/jaxhpc_input.config' } + usc { includeConfig 'conf/executors/usc.config' } + conda { process.conda = params.conda_name conda.cacheDir = params.conda_cache From a3751aaf3b977ca8dbd6b2b8d1c349dbd93d09ba Mon Sep 17 00:00:00 2001 From: yangliu Date: Thu, 3 Jul 2025 09:33:40 +0800 Subject: [PATCH 2/8] update tombo default --- README.md | 14 +++++++++----- nextflow.config | 3 ++- 2 files changed, 11 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 3ccd32ee..1828b40b 100755 --- a/README.md +++ b/README.md @@ -339,11 +339,15 @@ Please check [NANOME report](https://github.com/LabShengLi/nanome/blob/master/do Please check [phasing usage](https://github.com/LabShengLi/nanome/blob/master/docs/Phasing.md). ![PhasingDemo](https://github.com/LabShengLi/nanome/blob/master/docs/resources/nanome3t_5mc_phasing2.png) -### Lifebit CloudOS report -We now support running NANOME on cloud computing platform. [Lifebit](https://lifebit.ai/lifebit-cloudos/) is a web-based cloud computing platform, and below is the running reports: -* Ecoli test report: https://cloudos.lifebit.ai/public/jobs/6430509445941801546e5f8f -* Human test report: https://cloudos.lifebit.ai/public/jobs/6430639045941801546e627f -* NA12878 chr22 report: https://cloudos.lifebit.ai/public/jobs/6430b64645941801546e7400 +[//]: # (### Lifebit CloudOS report) + +[//]: # (We now support running NANOME on cloud computing platform. [Lifebit](https://lifebit.ai/lifebit-cloudos/) is a web-based cloud computing platform, and below is the running reports:) + +[//]: # (* Ecoli test report: https://cloudos.lifebit.ai/public/jobs/6430509445941801546e5f8f) + +[//]: # (* Human test report: https://cloudos.lifebit.ai/public/jobs/6430639045941801546e627f) + +[//]: # (* NA12878 chr22 report: https://cloudos.lifebit.ai/public/jobs/6430b64645941801546e7400) ## Revision History diff --git a/nextflow.config b/nextflow.config index 6b46e287..81c1f672 100755 --- a/nextflow.config +++ b/nextflow.config @@ -165,7 +165,8 @@ params { BasecallGroupName = "Basecall_1D_000" // Basecall ID name used by resquiggle BasecallSubGroupName = "BaseCalled_template" ResquiggleCorrectedGroup = "RawGenomeCorrected_000" - tomboResquiggleOptions = null // '--signal-length-range 0 500000 --sequence-length-range 0 50000', ref: tombo resquiggle --print-advanced-arguments + // ref: https://github.com/nanoporetech/tombo/issues/167 + tomboResquiggleOptions = '--signal-length-range 0 500000 --sequence-length-range 0 50000' // null, or '--signal-length-range 0 500000 --sequence-length-range 0 50000', ref: tombo resquiggle --print-advanced-arguments tomboMultiprocessRegionSize = 1000 // tombo methylation calling options tomboThreadsPerProcess = 1 // --threads-per-process , set to 4 or more for faster, but danger for memory issues feature_extract = false // if extract tombo resquiggle features using deepsignal v1 From fc9c9f715f765101dc8a6692d004d13f8be382da Mon Sep 17 00:00:00 2001 From: yangliu Date: Sat, 5 Jul 2025 10:29:50 +0800 Subject: [PATCH 3/8] update multi inputs for dorado --- conf/examples/test_dorado.config | 2 +- modules/DORADO.nf | 32 +++++++++++++++++--------------- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/conf/examples/test_dorado.config b/conf/examples/test_dorado.config index f0912be3..1a66d618 100644 --- a/conf/examples/test_dorado.config +++ b/conf/examples/test_dorado.config @@ -28,7 +28,7 @@ process { maxRetries = params.maxRetries withName: 'ENVCHECK' { - // allow retry if download Rerio model failed + // allow retry if download files failed errorStrategy = {task.attempt >= process.maxRetries ? params.errorStrategy : 'retry' } } } diff --git a/modules/DORADO.nf b/modules/DORADO.nf index 8166bae1..c070c90e 100644 --- a/modules/DORADO.nf +++ b/modules/DORADO.nf @@ -34,21 +34,23 @@ process DORADO_UNTAR { ## Extract input files tar/tar.gz/folder mkdir -p untarTempDir - fast5Input=!{fast5InputList} - if [[ $fast5Input == *.tar && -f ${fast5Input} ]] ; then - ### deal with tar - tar -xf ${fast5Input} -C untarTempDir - elif [[ ${fast5Input} == *.tar.gz && -f ${fast5Input} ]] ; then - ### deal with tar.gz - tar -xzf ${fast5Input} -C untarTempDir - elif [[ -d ${fast5Input} ]]; then - ## For dir, should copy files, we do not want to change original files such as old analyses data in fast5 - find ${fast5Input}/ -name "*.!{params.file_format}" | \ - parallel -j!{cores} cp -L -f {} untarTempDir/ - else - echo "### Untar error for input=${fast5Input}" - fi - + for fast5Input in !{fast5InputList}; do + echo "Unpacking $fast5Input..." + ## fast5Input=!{fast5InputList} + if [[ $fast5Input == *.tar && -f ${fast5Input} ]] ; then + ### deal with tar + tar -xf ${fast5Input} -C untarTempDir + elif [[ ${fast5Input} == *.tar.gz && -f ${fast5Input} ]] ; then + ### deal with tar.gz + tar -xzf ${fast5Input} -C untarTempDir + elif [[ -d ${fast5Input} ]]; then + ## For dir, should copy files, we do not want to change original files such as old analyses data in fast5 + find ${fast5Input}/ -name "*.!{params.file_format}" | \ + parallel -j!{cores} cp -L -f {} untarTempDir/ + else + echo "### Untar error for input=${fast5Input}" + fi + done ## Move fast5 raw/basecalled files into XXX.untar folder mkdir -p !{params.dsname}.untar From 4fe708c31dae343814c664b17eb0a00593666ebe Mon Sep 17 00:00:00 2001 From: yangliu Date: Sat, 5 Jul 2025 10:46:35 +0800 Subject: [PATCH 4/8] update bam output as default --- nextflow.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow.config b/nextflow.config index 81c1f672..fc898a59 100755 --- a/nextflow.config +++ b/nextflow.config @@ -128,7 +128,7 @@ params { outputIntermediate = false // if keep each batch outputs outputRaw = true // if output the raw combined outputs for each tool's format outputGenomeBrowser = false - outputBam = false // if output basecalled merged bam + outputBam = true // if output basecalled merged bam outputONTCoverage = false // if output ONT coverage // meth type From 9482a4c0e6b5134d27c269876a0461163071817e Mon Sep 17 00:00:00 2001 From: yangliu Date: Thu, 2 Oct 2025 12:46:17 -0700 Subject: [PATCH 5/8] add bam as input, skip basecall and methylation call --- main.nf | 13 +++++++++---- modules/DORADO.nf | 31 +++++++++++++++++++++++++++++-- nextflow.config | 1 + 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/main.nf b/main.nf index a661c565..64678348 100755 --- a/main.nf +++ b/main.nf @@ -336,14 +336,19 @@ workflow { if (params.dorado) { // Dorado ecosystems // ch_inputs.collect().view() + if (!params.input_bam) { + DORADO_UNTAR(ch_inputs.collect()) + DORADO_CALL(DORADO_UNTAR.out.untar, ENVCHECK.out.reference_genome) + dorado_call = DORADO_CALL.out.dorado_call + } else { + dorado_call = ch_inputs.collect() + } - DORADO_UNTAR(ch_inputs.collect()) - DORADO_CALL(DORADO_UNTAR.out.untar, ENVCHECK.out.reference_genome) - DORADO_QC(DORADO_CALL.out.dorado_call, ENVCHECK.out.reference_genome) + DORADO_QC(dorado_call, ENVCHECK.out.reference_genome) bam_fn = "${params.dsname}.dorado_call/${params.dsname}.dorado_call.bam" DORADO_CALL_EXTRACT("per_read", bam_fn, - DORADO_CALL.out.dorado_call, ENVCHECK.out.reference_genome, + dorado_call, ENVCHECK.out.reference_genome, ch_src, ch_utils) UNIFY("Dorado","NANOME", "all", diff --git a/modules/DORADO.nf b/modules/DORADO.nf index c070c90e..af37e99d 100644 --- a/modules/DORADO.nf +++ b/modules/DORADO.nf @@ -136,12 +136,39 @@ process DORADO_QC { samtools_cores = task.cpus * params.mediumProcTimes ''' - mkdir -p !{params.dsname}_dorado_qc + mkdir -p !{params.dsname}.dorado_qc - NanoComp --bam !{dorado_call}/!{params.dsname}.dorado_call.bam \ + bam_file=!{dorado_call}/*.bam + NanoComp --bam ${bam_file} \ --names !{params.dsname} --outdir !{params.dsname}.dorado_qc -t !{cores} \ --raw -f pdf -p !{params.dsname}_ &>> !{params.dsname}.DORADO_QC.run.log + ## extract QC metrics to tsv + infer_input=(!{params.dsname}.dorado_qc/*_NanoStats.txt) + + input="${infer_input[0]}" + output=!{params.dsname}.dorado_qc/!{params.dsname}_NanoStatsQC.tsv + + # Extract only the top 13 lines (the summary section) + # Parse top 13 lines into one row (header + values) + head -n 13 -- "$input" | \ + awk -F: -v output="$output" ' + { + key=$1 + gsub(/^[ \t]+|[ \t]+$/, "", key) # trim + gsub(/ /, "_", key) # spaces -> _ + val=$2 + gsub(/,/, "", val) # remove commas in numbers + gsub(/^[ \t]+|[ \t]+$/, "", val) # trim + keys = keys ? keys "\t" key : key + values = values ? values "\t" val : val + } + END { + print keys > output # header (overwrite) + print values >> output # one row of values (append) + }' + echo "TSV file created: $output" + echo "### Dorado QC all DONE" ''' } diff --git a/nextflow.config b/nextflow.config index fc898a59..0771affc 100755 --- a/nextflow.config +++ b/nextflow.config @@ -49,6 +49,7 @@ params { input = null outdir = "results" pod5 = false + input_bam = false // Dorado dorado = false From 7d7210757dfd639b9416fdff07bc19c9ab219714 Mon Sep 17 00:00:00 2001 From: yangliu Date: Thu, 2 Oct 2025 15:44:27 -0700 Subject: [PATCH 6/8] add kit option --- main.nf | 24 ++++++++++++++---------- modules/DORADO.nf | 20 +++++++++++++++----- nextflow.config | 1 + 3 files changed, 30 insertions(+), 15 deletions(-) diff --git a/main.nf b/main.nf index 64678348..b700f86c 100755 --- a/main.nf +++ b/main.nf @@ -333,7 +333,6 @@ workflow { // environment check ENVCHECK(ch_genome, ch_utils, ch_rerio_dir, ch_deepsignal_dir) - if (params.dorado) { // Dorado ecosystems // ch_inputs.collect().view() if (!params.input_bam) { @@ -344,17 +343,22 @@ workflow { dorado_call = ch_inputs.collect() } - DORADO_QC(dorado_call, ENVCHECK.out.reference_genome) + if (params.kit_name == null) { + DORADO_QC(dorado_call, ENVCHECK.out.reference_genome) + + // bam_fn = "${params.dsname}.dorado_call/${params.dsname}.dorado_call.bam" + // extract per read + DORADO_CALL_EXTRACT("per_read", + dorado_call, ENVCHECK.out.reference_genome, + ch_src, ch_utils) - bam_fn = "${params.dsname}.dorado_call/${params.dsname}.dorado_call.bam" - DORADO_CALL_EXTRACT("per_read", bam_fn, - dorado_call, ENVCHECK.out.reference_genome, - ch_src, ch_utils) + // convert to per site + UNIFY("Dorado","NANOME", "all", + DORADO_CALL_EXTRACT.out.dorado_call_extract, + ENVCHECK.out.reference_genome, + ch_src, ch_utils) + } - UNIFY("Dorado","NANOME", "all", - DORADO_CALL_EXTRACT.out.dorado_call_extract, - ENVCHECK.out.reference_genome, - ch_src, ch_utils) } else { // Guppy ecosystems if (params.runBasecall) { UNTAR(ch_inputs) diff --git a/modules/DORADO.nf b/modules/DORADO.nf index af37e99d..15d40598 100644 --- a/modules/DORADO.nf +++ b/modules/DORADO.nf @@ -85,13 +85,13 @@ process DORADO_CALL { shell: cores = task.cpus * params.highProcTimes + kitOpt = params.kit_name ? "--kit-name ${params.kit_name}" : "" ''' date; hostname; pwd - echo untar_dir=!{untar_dir} echo reference_genome=!{reference_genome} -`` + echo !{kitOpt} dorado -vv ls /models @@ -101,6 +101,7 @@ process DORADO_CALL { dorado basecaller \ !{params.dorado_model_dir}/!{params.dorado_basecall_model} \ !{untar_dir}/ \ + !{kitOpt} \ --models-directory !{params.dorado_model_dir} \ --modified-bases-models !{params.dorado_model_dir}/!{params.dorado_methcall_model} \ -x auto --verbose -r \ @@ -110,6 +111,14 @@ process DORADO_CALL { mv !{params.dsname}.dorado_call/*.bam !{params.dsname}.dorado_call/!{params.dsname}.dorado_call.bam mv !{params.dsname}.dorado_call/*.bam.bai !{params.dsname}.dorado_call/!{params.dsname}.dorado_call.bam.bai + + if [[ "!{params.kit_name}" != "null" ]]; then + mkdir -p !{params.dsname}.dorado_call_demux_!{params.kit_name} + dorado demux \ + --output-dir !{params.dsname}.dorado_call_demux_!{params.kit_name} \ + --no-classify !{params.dsname}.dorado_call/!{params.dsname}.dorado_call.bam -v \ + 2> >(tee -a !{params.dsname}_dorado_demux.run.log) + fi echo "### Dorado call DONE" ''' } @@ -184,7 +193,7 @@ process DORADO_CALL_EXTRACT { input: val call_tagname // call's tagname - val bam_fn // BAM file location + // val bam_fn // BAM file location path dorado_call // can be a folder contains files of BAM and BAM.BAI path reference_genome path ch_src @@ -202,12 +211,13 @@ process DORADO_CALL_EXTRACT { date; hostname; pwd echo !{call_tagname} - echo !{bam_fn} echo !{dorado_call} + bam_fn=!{dorado_call}/*.bam + python utils/modbam2bed_extract_read_cpg.py \ -r !{params.referenceGenome} \ - -i !{bam_fn} \ + -i ${bam_fn} \ -o !{params.dsname}.!{call_tagname}.dorado_call_extract.tsv \ -a !{params.guppy_canon_threshold} -b !{params.guppy_mod_threshold} diff --git a/nextflow.config b/nextflow.config index 0771affc..7e48f88a 100755 --- a/nextflow.config +++ b/nextflow.config @@ -50,6 +50,7 @@ params { outdir = "results" pod5 = false input_bam = false + kit_name = null // kit_name="SQK-RBK114-24" // Dorado dorado = false From 1f1e59e4ec7bb09b8209a861282c04396c980a69 Mon Sep 17 00:00:00 2001 From: yangliu Date: Thu, 2 Oct 2025 17:55:06 -0700 Subject: [PATCH 7/8] add kit option --- conf/executors/usc.config | 4 +- main.nf | 10 ++- modules/DORADO.nf | 128 +++++++++++++++++++++++++++++++++++--- nextflow.config | 9 +-- 4 files changed, 134 insertions(+), 17 deletions(-) diff --git a/conf/executors/usc.config b/conf/executors/usc.config index 0dcd5fab..d816f0b1 100755 --- a/conf/executors/usc.config +++ b/conf/executors/usc.config @@ -32,14 +32,14 @@ process{ // module = "slurm:singularity" containerOptions = params.containerOptions - withName: 'UNTAR|ALIGNMENT|QCEXPORT|RESQUIGGLE|NANOPOLISH|Tombo|Guppy6Comb|METEORE|CLAIR3|PHASING|CONSENSUS|EVAL|REPORT|NPLSHCOMB|MGLDNCOMB|DPSIGCOMB|DEEPSIGNAL2COMB|GuppyComb|TomboComb|DpmodComb|Guppy6Comb' { + withName: 'ENVCHECK|UNTAR|ALIGNMENT|QCEXPORT|RESQUIGGLE|NANOPOLISH|Tombo|Guppy6Comb|METEORE|CLAIR3|PHASING|CONSENSUS|EVAL|REPORT|NPLSHCOMB|MGLDNCOMB|DPSIGCOMB|DEEPSIGNAL2COMB|GuppyComb|TomboComb|DpmodComb|Guppy6Comb|DORADO_DEMUX' { queue = params.cpu_queue cpus = params.cpu_processors memory = params.cpu_memory time = params.cpu_time } - withName: 'ENVCHECK|BASECALL|MEGALODON|Guppy6|Guppy|DEEPSIGNAL|DEEPSIGNAL2|DeepMod|METEORE|DORADO_CALL' { + withName: 'BASECALL|MEGALODON|Guppy6|Guppy|DEEPSIGNAL|DEEPSIGNAL2|DeepMod|METEORE|DORADO_CALL' { queue = params.gpu_queue cpus = params.gpu_processors memory = params.gpu_memory diff --git a/main.nf b/main.nf index b700f86c..077554f9 100755 --- a/main.nf +++ b/main.nf @@ -303,7 +303,7 @@ include { EVAL } from './modules/EVAL' include { REPORT } from './modules/REPORT' -include { DORADO_UNTAR; DORADO_CALL; DORADO_QC; DORADO_CALL_EXTRACT; UNIFY } from './modules/DORADO' +include { DORADO_UNTAR; DORADO_CALL; DORADO_DEMUX; DORADO_QC; DORADO_CALL_EXTRACT; UNIFY; CLAIR3_dorado } from './modules/DORADO' // place holder channel, used for empty file of a channel null1 = Channel.fromPath("${projectDir}/utils/null1") @@ -343,7 +343,9 @@ workflow { dorado_call = ch_inputs.collect() } - if (params.kit_name == null) { + if (params.demux) { + DORADO_DEMUX(dorado_call) + } else { // demux will not run QC DORADO_QC(dorado_call, ENVCHECK.out.reference_genome) // bam_fn = "${params.dsname}.dorado_call/${params.dsname}.dorado_call.bam" @@ -357,6 +359,10 @@ workflow { DORADO_CALL_EXTRACT.out.dorado_call_extract, ENVCHECK.out.reference_genome, ch_src, ch_utils) + + if (params.phasing) { + CLAIR3_dorado(dorado_call, ENVCHECK.out.reference_genome) + } } } else { // Guppy ecosystems diff --git a/modules/DORADO.nf b/modules/DORADO.nf index 15d40598..86f64985 100644 --- a/modules/DORADO.nf +++ b/modules/DORADO.nf @@ -111,19 +111,46 @@ process DORADO_CALL { mv !{params.dsname}.dorado_call/*.bam !{params.dsname}.dorado_call/!{params.dsname}.dorado_call.bam mv !{params.dsname}.dorado_call/*.bam.bai !{params.dsname}.dorado_call/!{params.dsname}.dorado_call.bam.bai - - if [[ "!{params.kit_name}" != "null" ]]; then - mkdir -p !{params.dsname}.dorado_call_demux_!{params.kit_name} - dorado demux \ - --output-dir !{params.dsname}.dorado_call_demux_!{params.kit_name} \ - --no-classify !{params.dsname}.dorado_call/!{params.dsname}.dorado_call.bam -v \ - 2> >(tee -a !{params.dsname}_dorado_demux.run.log) - fi echo "### Dorado call DONE" ''' } +process DORADO_DEMUX { + tag "${params.dsname}" + + publishDir "${params.outdir}/${params.dsname}-methylation-callings", + pattern: "${params.dsname}.dorado_call_demux*", + mode: "copy" + + input: + path dorado_call // can be a folder/tar/tar.gz file + + output: + // untar dir + path "${params.dsname}.dorado_call_demux*", emit: dorado_demux, optional: true + + when: + params.dorado + + shell: + cores = task.cpus * params.highProcTimes + kitOpt = params.kit_name ? "--kit-name ${params.kit_name}" : "" + ''' + date; hostname; pwd + + bam_fn=!{dorado_call}/*.bam + + mkdir -p !{params.dsname}.dorado_call_demux_!{params.kit_name} + dorado demux \ + --output-dir !{params.dsname}.dorado_call_demux_!{params.kit_name} \ + --no-classify ${bam_fn} -v \ + 2> >(tee -a !{params.dsname}_dorado_demux.run.log) + echo "### Dorado demux DONE" + ''' +} + + process DORADO_QC { tag "${params.dsname}" @@ -228,7 +255,7 @@ process DORADO_CALL_EXTRACT { } -// extract BAM as per-read results +// convert per-read results to per-site/MethylKit results process UNIFY { tag "${call_tagname}" @@ -289,3 +316,86 @@ process UNIFY { echo "### Unify DONE" ''' } + + +process CLAIR3_dorado { + tag "${params.dsname}" + + publishDir "${params.outdir}/${params.dsname}-phasing", + mode: "copy", pattern: "${params.dsname}_clair3_out" + + publishDir "${params.outdir}/${params.dsname}-phasing", + mode: "copy", pattern: "${params.dsname}_phased_intermediate" + + publishDir "${params.outdir}/${params.dsname}-phasing", + mode: "copy", pattern: "${params.dsname}_phased_bam" + + publishDir "${params.outdir}/${params.dsname}-run-log", + mode: "copy", pattern: "*.run.log" + + input: + path dorado_call + path reference_genome + + output: + path "${params.dsname}_clair3_out", emit: clair3_out_ch, optional: true + path "${params.dsname}_phased_intermediate", emit: phased_intermediate_out_ch, optional: true + path "${params.dsname}_phased_bam", emit: phased_bam_out_ch, optional: true + path "*.Clair3.run.log", optional:true, emit: runlog + + """ + run_clair3.sh --version + + bam_fn=${dorado_call}/*.bam + MODEL_NAME="${params.CLAIR3_MODEL_NAME}" ##"r941_prom_sup_g5014" + + mkdir -p ${params.dsname}_clair3_out + run_clair3.sh \ + --bam_fn=\${bam_fn} \ + --ref_fn=${params.referenceGenome} \ + --threads=${task.cpus} \ + --platform="ont" \ + --model_path="/opt/models/\${MODEL_NAME}" \ + --enable_phasing --var_pct_phasing=${params.CLAIR3_var_pct_phasing} \ + --output=${params.dsname}_clair3_out ${params.ctg_name ? "--ctg_name=${params.ctg_name}": " "} \ + &> ${params.dsname}.Clair3.run.log + + echo "### Clair3 for variant calling DONE" + + # haplotag + # run whatshap haplotag + mkdir -p ${params.dsname}_phased_intermediate + phased_vcf_fn=${params.dsname}_clair3_out/phased_merge_output.vcf.gz + tsvFile="${params.dsname}_phased_intermediate/${params.dsname}_whatshap_haplotag_read_list.tsv" + haplotagBamFile="${params.dsname}_phased_intermediate/${params.dsname}_whatshap_haplotag_bam.bam" + + ## Phasing tag extraction for each chromosome + ## older version lacks: --skip-missing-contigs --output-threads ${task.cpus} + whatshap haplotag \ + --ignore-read-groups\ + --reference ${params.referenceGenome} \ + --output-haplotag-list \${tsvFile} \ + -o \${haplotagBamFile} \ + \${phased_vcf_fn} \${bam_fn} \ + 2>&1 | tee -a ${params.dsname}_phased_intermediate/${params.dsname}_whatshap_haplotag.run.log && \ + touch ${params.dsname}_phased_intermediate/${params.dsname}_whatshap_haplotag.done && \ + echo "### DONE for haplotag" + + # split bam + mkdir -p ${params.dsname}_phased_bam + outdir=${params.dsname}_phased_bam + whatshap split \ + --output-h1 \${outdir}/${params.dsname}_split_HP1.bam \ + --output-h2 \${outdir}/${params.dsname}_split_HP2.bam \ + --output-untagged \${outdir}/${params.dsname}_split_untagged.bam \ + \${bam_fn} \ + \${tsvFile} \ + 2>&1 | tee \${outdir}/${params.dsname}_whatshap_split.run.log + + # Index bam files + samtools index -@ ${task.cpus} \${outdir}/${params.dsname}_split_HP1.bam + samtools index -@ ${task.cpus} \${outdir}/${params.dsname}_split_HP2.bam + samtools index -@ ${task.cpus} \${outdir}/${params.dsname}_split_untagged.bam + echo "### whatshap split DONE" + """ +} diff --git a/nextflow.config b/nextflow.config index 7e48f88a..69d199bf 100755 --- a/nextflow.config +++ b/nextflow.config @@ -50,6 +50,7 @@ params { outdir = "results" pod5 = false input_bam = false + demux = false kit_name = null // kit_name="SQK-RBK114-24" // Dorado @@ -290,7 +291,7 @@ profiles { withName: 'Tombo|DeepMod|METEORE' { container = params.tombo_docker_name } - withName: 'CLAIR3' { + withName: 'CLAIR3|CLAIR3_dorado' { container = params.clair3_docker_name } withName: 'DEEPSIGNAL2' { @@ -299,7 +300,7 @@ profiles { withName: 'Guppy6|DORADO_CALL_EXTRACT' { container = params.guppy_stable_name } - withName: 'DORADO_CALL' { + withName: 'DORADO_CALL|DORADO_DEMUX' { container = params.dorado_docker_name } } @@ -337,7 +338,7 @@ profiles { container = params.tombo_docker_name.startsWith("/") ? params.tombo_docker_name : "docker://${params.tombo_docker_name}" } - withName: 'CLAIR3' { + withName: 'CLAIR3|CLAIR3_dorado' { // container = "docker://${params.clair3_docker_name}" container = params.clair3_docker_name.startsWith("/") ? params.clair3_docker_name : "docker://${params.clair3_docker_name}" @@ -351,7 +352,7 @@ profiles { container = params.guppy_stable_name.startsWith("/") ? params.guppy_stable_name : "docker://${params.guppy_stable_name}" } - withName: 'DORADO_CALL' { + withName: 'DORADO_CALL|DORADO_DEMUX' { container = params.guppy_stable_name.startsWith("/") ? params.dorado_docker_name : "docker://${params.dorado_docker_name}" } From ee3821724ebb86bdd8dee3b07f03689ad2bcf3ab Mon Sep 17 00:00:00 2001 From: yangliu Date: Fri, 3 Oct 2025 10:33:20 -0700 Subject: [PATCH 8/8] add kit option --- modules/DORADO.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/DORADO.nf b/modules/DORADO.nf index 86f64985..dd420b77 100644 --- a/modules/DORADO.nf +++ b/modules/DORADO.nf @@ -85,7 +85,7 @@ process DORADO_CALL { shell: cores = task.cpus * params.highProcTimes - kitOpt = params.kit_name ? "--kit-name ${params.kit_name}" : "" + kitOpt = params.demux ? "--kit-name ${params.kit_name}" : "" ''' date; hostname; pwd