From a95316f754d6a9254b09c1d72a31535ae2790f17 Mon Sep 17 00:00:00 2001 From: fellen31 Date: Tue, 4 Mar 2025 16:40:25 +0100 Subject: [PATCH 1/3] Update hifiasm to sort inputs --- CHANGELOG.md | 2 + modules.json | 2 +- modules/nf-core/hifiasm/environment.yml | 2 + modules/nf-core/hifiasm/hifiasm.diff | 20 ------ modules/nf-core/hifiasm/main.nf | 16 +++-- modules/nf-core/hifiasm/meta.yml | 8 ++- modules/nf-core/hifiasm/tests/main.nf.test | 66 +++++++++++++++++-- .../nf-core/hifiasm/tests/main.nf.test.snap | 54 ++++++++++++++- .../local/align_assemblies/tests/main.nf.test | 3 +- subworkflows/local/genome_assembly.nf | 3 +- .../samplesheet_multisample_bam.nf.test.snap | 8 +-- ...mplesheet_multisample_ont_bam.nf.test.snap | 10 +-- 12 files changed, 147 insertions(+), 47 deletions(-) delete mode 100644 modules/nf-core/hifiasm/hifiasm.diff diff --git a/CHANGELOG.md b/CHANGELOG.md index f734d857..b268719d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +- [#](https://github.com/genomic-medicine-sweden/nallo/pull/) - Fixed unstable assembly outputs when there's multiple input files per sample + ### Parameters | Old parameter | New parameter | diff --git a/modules.json b/modules.json index ea56429c..b3380d19 100644 --- a/modules.json +++ b/modules.json @@ -151,7 +151,7 @@ }, "hifiasm": { "branch": "master", - "git_sha": "6bb8be8a75d05ee6d31205b2b97d48c7bcef60ab", + "git_sha": "fcab846b30baf8391257dd302feece91714769f2", "installed_by": ["modules"], "patch": "modules/nf-core/hifiasm/hifiasm.diff" }, diff --git a/modules/nf-core/hifiasm/environment.yml b/modules/nf-core/hifiasm/environment.yml index 6aea679a..b7ba1981 100644 --- a/modules/nf-core/hifiasm/environment.yml +++ b/modules/nf-core/hifiasm/environment.yml @@ -1,3 +1,5 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json channels: - conda-forge - bioconda diff --git a/modules/nf-core/hifiasm/hifiasm.diff b/modules/nf-core/hifiasm/hifiasm.diff deleted file mode 100644 index 0f3abe03..00000000 --- a/modules/nf-core/hifiasm/hifiasm.diff +++ /dev/null @@ -1,20 +0,0 @@ -Changes in component 'nf-core/hifiasm' -'modules/nf-core/hifiasm/meta.yml' is unchanged -Changes in 'hifiasm/main.nf': ---- modules/nf-core/hifiasm/main.nf -+++ modules/nf-core/hifiasm/main.nf -@@ -8,7 +8,7 @@ - 'biocontainers/hifiasm:0.24.0--h5ca1c30_0' }" - - input: -- tuple val(meta) , path(reads) -+ tuple val(meta) , path(reads, stageAs: "?/*") - tuple val(meta1), path(paternal_kmer_dump), path(maternal_kmer_dump) - tuple val(meta2), path(hic_read1) , path(hic_read2) - - -'modules/nf-core/hifiasm/environment.yml' is unchanged -'modules/nf-core/hifiasm/tests/main.nf.test.snap' is unchanged -'modules/nf-core/hifiasm/tests/nextflow.config' is unchanged -'modules/nf-core/hifiasm/tests/main.nf.test' is unchanged -************************************************************ diff --git a/modules/nf-core/hifiasm/main.nf b/modules/nf-core/hifiasm/main.nf index 14513750..b1097862 100644 --- a/modules/nf-core/hifiasm/main.nf +++ b/modules/nf-core/hifiasm/main.nf @@ -8,7 +8,7 @@ process HIFIASM { 'biocontainers/hifiasm:0.24.0--h5ca1c30_0' }" input: - tuple val(meta) , path(reads, stageAs: "?/*") + tuple val(meta) , path(long_reads) , path(ul_reads) tuple val(meta1), path(paternal_kmer_dump), path(maternal_kmer_dump) tuple val(meta2), path(hic_read1) , path(hic_read2) @@ -32,6 +32,11 @@ process HIFIASM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + + def long_reads_sorted = long_reads instanceof List ? long_reads.sort{ it.name } : long_reads + def ul_reads_sorted = ul_reads instanceof List ? ul_reads.sort{ it.name } : ul_reads + def ultralong = ul_reads ? "--ul ${ul_reads_sorted}" : "" + if ((paternal_kmer_dump) && (maternal_kmer_dump) && (hic_read1) && (hic_read2)) { error "Hifiasm Trio-binning and Hi-C integrated should not be used at the same time" } else if ((paternal_kmer_dump) && !(maternal_kmer_dump)) { @@ -46,7 +51,8 @@ process HIFIASM { -t $task.cpus \\ -1 $paternal_kmer_dump \\ -2 $maternal_kmer_dump \\ - $reads \\ + $ultralong \\ + $long_reads_sorted \\ 2> >( tee ${prefix}.stderr.log >&2 ) @@ -67,7 +73,8 @@ process HIFIASM { -t $task.cpus \\ --h1 $hic_read1 \\ --h2 $hic_read2 \\ - $reads \\ + $ultralong \\ + $long_reads \\ 2> >( tee ${prefix}.stderr.log >&2 ) @@ -82,7 +89,8 @@ process HIFIASM { $args \\ -o ${prefix}.asm \\ -t $task.cpus \\ - $reads \\ + $ultralong \\ + $long_reads \\ 2> >( tee ${prefix}.stderr.log >&2 ) cat <<-END_VERSIONS > versions.yml diff --git a/modules/nf-core/hifiasm/meta.yml b/modules/nf-core/hifiasm/meta.yml index 343ef182..b255571f 100644 --- a/modules/nf-core/hifiasm/meta.yml +++ b/modules/nf-core/hifiasm/meta.yml @@ -22,10 +22,12 @@ input: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] - - reads: + - long_reads: type: file - description: FASTQ file with PacBio HiFi reads - pattern: "*.{fastq}" + description: Long reads PacBio HiFi reads or ONT reads (requires ext.arg '--ont'). + - ul_reads: + type: file + description: ONT long reads to use with --ul. - - meta1: type: map description: | diff --git a/modules/nf-core/hifiasm/tests/main.nf.test b/modules/nf-core/hifiasm/tests/main.nf.test index 5258890a..b426d6f0 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test +++ b/modules/nf-core/hifiasm/tests/main.nf.test @@ -15,7 +15,8 @@ nextflow_process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [ [], @@ -49,13 +50,59 @@ nextflow_process { } } + test("homo_sapiens pacbio hifi [fastq x2, [,], [,] ]") { + + when { + process { + """ + input[0] = Channel.of([ + [ id : 'test'], + [ + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/alz.ccs.fastq', checkIfExists: true), + ], + [] + ]) + input[1] = [ + [], + [], + [] + ] + input[2] = [ + [], + [], + [] + ] + """ + } + } + then { + assertAll( + { assert process.success }, + { assert file(process.out.corrected_reads.get(0).get(1)).exists() }, + { assert file(process.out.source_overlaps.get(0).get(1)).exists() }, + { assert file(process.out.reverse_overlaps.get(0).get(1)).exists() }, + { assert file(process.out.log.get(0).get(1)).exists() }, + { assert snapshot( + process.out.raw_unitigs, + process.out.processed_contigs, + process.out.processed_unitigs, + process.out.paternal_contigs, + process.out.maternal_contigs, + process.out.versions + ).match() } + ) + } + } + test("homo_sapiens pacbio hifi [fastq, [yak, yak], [,] ]") { when { process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [ [ id : 'test' ], @@ -94,7 +141,8 @@ nextflow_process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [ [ id : 'test' ], @@ -132,7 +180,8 @@ nextflow_process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [ [ id : 'test' ], @@ -158,7 +207,8 @@ nextflow_process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [ [ id : 'test'], @@ -180,7 +230,8 @@ nextflow_process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [] input[2] = [ @@ -204,7 +255,8 @@ nextflow_process { """ input[0] = [ [ id : 'test'], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ] input[1] = [ [], diff --git a/modules/nf-core/hifiasm/tests/main.nf.test.snap b/modules/nf-core/hifiasm/tests/main.nf.test.snap index e9a0de59..508e4c27 100644 --- a/modules/nf-core/hifiasm/tests/main.nf.test.snap +++ b/modules/nf-core/hifiasm/tests/main.nf.test.snap @@ -51,6 +51,58 @@ }, "timestamp": "2024-11-28T10:51:30.175326435" }, + "homo_sapiens pacbio hifi [fastq x2, [,], [,] ]": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.asm.bp.r_utg.gfa:md5,e6e38ac76f6b73142c3fe403f71d27b0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.asm.bp.p_ctg.gfa:md5,43890a1832d8f26de263e57dc5e3b8de" + ] + ], + [ + [ + { + "id": "test" + }, + "test.asm.bp.p_utg.gfa:md5,e6e38ac76f6b73142c3fe403f71d27b0" + ] + ], + [ + [ + { + "id": "test" + }, + "test.asm.bp.hap1.p_ctg.gfa:md5,7d7ea2bed472de263f6ec3521959b0d9" + ] + ], + [ + [ + { + "id": "test" + }, + "test.asm.bp.hap2.p_ctg.gfa:md5,ce096a66c9bba039c6a22ba9e9409d01" + ] + ], + [ + "versions.yml:md5,a2ea36e18c39850b1680302d9f6c950f" + ] + ], + "meta": { + "nf-test": "0.9.2", + "nextflow": "24.10.3" + }, + "timestamp": "2025-03-04T10:53:37.361240273" + }, "homo_sapiens pacbio hifi [fastq, [,], [fastq, fastq] ]": { "content": [ [ @@ -316,4 +368,4 @@ }, "timestamp": "2024-11-28T10:51:45.301359171" } -} +} \ No newline at end of file diff --git a/subworkflows/local/align_assemblies/tests/main.nf.test b/subworkflows/local/align_assemblies/tests/main.nf.test index c0f80d12..9388ed32 100644 --- a/subworkflows/local/align_assemblies/tests/main.nf.test +++ b/subworkflows/local/align_assemblies/tests/main.nf.test @@ -12,7 +12,8 @@ nextflow_workflow { """ input[0] = Channel.of([ [ id:'hg38' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true) + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/pacbio/fastq/test_hifi.fastq.gz', checkIfExists: true), + [] ]) input[1] = [[],[],[]] input[2] = [[],[],[]] diff --git a/subworkflows/local/genome_assembly.nf b/subworkflows/local/genome_assembly.nf index 92894398..6ec0ac45 100644 --- a/subworkflows/local/genome_assembly.nf +++ b/subworkflows/local/genome_assembly.nf @@ -18,6 +18,7 @@ workflow ASSEMBLY { ch_reads .groupTuple() + .map { meta, reads -> [ meta, reads, [] ] } .set { hifiasm_in } HIFIASM ( hifiasm_in, ch_hifiasm_empty, ch_hifiasm_empty ) @@ -97,7 +98,7 @@ workflow ASSEMBLY { hifiasm_trio_in .multiMap { meta, reads, paternal_yak, maternal_yak -> - reads : [meta, reads ] + reads : [meta, reads, [] ] yak : [meta, paternal_yak, maternal_yak] } .set { ch_hifiasm_in } diff --git a/tests/samplesheet_multisample_bam.nf.test.snap b/tests/samplesheet_multisample_bam.nf.test.snap index ee968490..1db3c140 100644 --- a/tests/samplesheet_multisample_bam.nf.test.snap +++ b/tests/samplesheet_multisample_bam.nf.test.snap @@ -596,8 +596,8 @@ [ "HG002_Revio_A_haplotype_1.assembly_summary:md5,b949217515292b5a7735af4ad4bd5b11", "HG002_Revio_A_haplotype_2.assembly_summary:md5,c3e50f54e9e2871771d1735c54e57c5e", - "HG002_Revio_B_haplotype_1.assembly_summary:md5,7fd907d993bf1140eabc2f3c2116df96", - "HG002_Revio_B_haplotype_2.assembly_summary:md5,241c10b531eae6c6bd56d71be2b0806c", + "HG002_Revio_B_haplotype_1.assembly_summary:md5,54140e0f06c64450e9e77cfcb2421a10", + "HG002_Revio_B_haplotype_2.assembly_summary:md5,3a21cefa394ba4851b2cc33e21670458", "HG002_Revio_A_modkit_pileup_1.bed.gz:md5,7af3b6246d0c007aec686714b96a0f7f", "HG002_Revio_A_modkit_pileup_1.bed.gz.tbi:md5,b7083ebf6ba176ed5a472ad653c5be27", "HG002_Revio_A_modkit_pileup_2.bed.gz:md5,98e42ec905d8af26046862618ca0823e", @@ -686,7 +686,7 @@ ], [ "HG002_Revio_B_aligned_assembly.bam", - "6c58e6d74d98843140656ce57b462e5b" + "c42cf558e7999076a6fca639b37615d8" ], [ "HG002_Revio_A.paraphase.bam", @@ -806,6 +806,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.3" }, - "timestamp": "2025-03-03T16:55:30.471294594" + "timestamp": "2025-03-04T16:23:36.302480749" } } \ No newline at end of file diff --git a/tests/samplesheet_multisample_ont_bam.nf.test.snap b/tests/samplesheet_multisample_ont_bam.nf.test.snap index 5d7a5b60..29a9c226 100644 --- a/tests/samplesheet_multisample_ont_bam.nf.test.snap +++ b/tests/samplesheet_multisample_ont_bam.nf.test.snap @@ -558,8 +558,8 @@ [ "HG002_ONT_A_haplotype_1.assembly_summary:md5,362b1d3234e3dcdfe07724b4c4e8fb32", "HG002_ONT_A_haplotype_2.assembly_summary:md5,68654acb33785035a2e701a87c169bb2", - "HG002_ONT_B_haplotype_1.assembly_summary:md5,cb2280cb3895ebbe29093ed9603e81d2", - "HG002_ONT_B_haplotype_2.assembly_summary:md5,fce697aa7c3c739eff6023b73fd2b382", + "HG002_ONT_B_haplotype_1.assembly_summary:md5,563bf25ae45f1f2b1d654dba508565cf", + "HG002_ONT_B_haplotype_2.assembly_summary:md5,28835c2b317c19c53d20eaa6cf7ae6a6", "HG002_ONT_A_modkit_pileup_1.bed.gz:md5,a833ab3d48e7f94372edbefe8ca99e59", "HG002_ONT_A_modkit_pileup_1.bed.gz.tbi:md5,a0c5a1faaf0373a58f97302b977e5595", "HG002_ONT_A_modkit_pileup_2.bed.gz:md5,7c645581ccb6a55046d11f9a6f4b0d4a", @@ -641,8 +641,8 @@ "9e38da39eb3ca1c6978ba9618b9ce9a0" ], [ - "HG002_ONT_B_aligned_assembly.bam", - "1eec67256dd70aaa8d6272b6bea5fb42" + "HG002_ONT_B_aligned_assembly.bam:md5,384821734adc9319e05b2dd4f759879f", + "6fbb9538b134415a33ae501fbee7a375" ], [ "HG002_ONT_A.paraphase.bam", @@ -743,6 +743,6 @@ "nf-test": "0.9.0", "nextflow": "24.10.3" }, - "timestamp": "2025-03-03T16:57:29.462467053" + "timestamp": "2025-03-04T16:25:34.44185882" } } \ No newline at end of file From 5a463a0b1536eb7338e2890c7fe8dc820f571197 Mon Sep 17 00:00:00 2001 From: fellen31 Date: Tue, 4 Mar 2025 16:44:20 +0100 Subject: [PATCH 2/3] CHANGELOG --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b268719d..f7dd9c80 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,7 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` -- [#](https://github.com/genomic-medicine-sweden/nallo/pull/) - Fixed unstable assembly outputs when there's multiple input files per sample +- [#595](https://github.com/genomic-medicine-sweden/nallo/pull/595) - Fixed unstable assembly outputs when there's multiple input files per sample ### Parameters From 6b13d2785dae2c319ca285a1c78d291210a4b636 Mon Sep 17 00:00:00 2001 From: fellen31 Date: Wed, 5 Mar 2025 12:48:51 +0100 Subject: [PATCH 3/3] fix tests --- tests/samplesheet_multisample_ont_bam.nf.test.snap | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/samplesheet_multisample_ont_bam.nf.test.snap b/tests/samplesheet_multisample_ont_bam.nf.test.snap index 29a9c226..84d480b3 100644 --- a/tests/samplesheet_multisample_ont_bam.nf.test.snap +++ b/tests/samplesheet_multisample_ont_bam.nf.test.snap @@ -641,7 +641,7 @@ "9e38da39eb3ca1c6978ba9618b9ce9a0" ], [ - "HG002_ONT_B_aligned_assembly.bam:md5,384821734adc9319e05b2dd4f759879f", + "HG002_ONT_B_aligned_assembly.bam", "6fbb9538b134415a33ae501fbee7a375" ], [ @@ -736,7 +736,7 @@ ] ], [ - + ] ], "meta": { @@ -745,4 +745,4 @@ }, "timestamp": "2025-03-04T16:25:34.44185882" } -} \ No newline at end of file +}