From 6dd509e61517869c028ec280f6dcb49e2a866c5d Mon Sep 17 00:00:00 2001 From: "Lataretu, Marie" Date: Fri, 15 Mar 2024 10:26:47 +0100 Subject: [PATCH] added parameters seq_threshold and n_threshold - set sequence identity and n thresholds for president --- CoVpipe2.nf | 4 ++-- modules/president.nf | 4 +++- nextflow.config | 4 ++++ workflows/genome_quality_wf.nf | 4 +++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/CoVpipe2.nf b/CoVpipe2.nf index e7ab5ac..6db05f4 100644 --- a/CoVpipe2.nf +++ b/CoVpipe2.nf @@ -6,7 +6,7 @@ nextflow.enable.dsl=2 if (params.help) { exit 0, helpMSG() } // parameter sanity check -Set valid_params = ['cores', 'max_cores', 'memory', 'help', 'profile', 'workdir', 'fastq', 'list', 'mode', 'run_id', 'reference', 'ref_genome', 'ref_annotation', 'adapter', 'fastp_additional_parameters', 'kraken', 'kraken_db_custom', 'taxid', 'read_linage', 'lcs_ucsc_version', 'lcs_ucsc_predefined', 'lcs_ucsc_update', 'lcs_ucsc_downsampling', 'lcs_variant_groups', 'lcs_cutoff', 'isize_filter', 'primer_bed', 'primer_bedpe', 'primer_version', 'bamclipper_additional_parameters', 'vcount', 'frac', 'cov', 'vois', 'var_mqm', 'var_sap', 'var_qual', 'cns_min_cov', 'cns_gt_adjust', 'cns_indel_filter', 'update', 'pangolin_docker_default', 'nextclade_docker_default', 'pangolin_conda_default', 'nextclade_conda_default', 'nextclade_dataset_name', 'nextclade_dataset_tag', 'output', 'reference_dir', 'read_dir', 'mapping_dir', 'variant_calling_dir', 'consensus_dir', 'linage_dir', 'report_dir', 'rki_dir', 'runinfo_dir', 'singularity_cache_dir', 'conda_cache_dir', 'databases', 'publish_dir_mode', 'cloudProcess', 'cloud-process'] +Set valid_params = ['cores', 'max_cores', 'memory', 'help', 'profile', 'workdir', 'fastq', 'list', 'mode', 'run_id', 'reference', 'ref_genome', 'ref_annotation', 'adapter', 'fastp_additional_parameters', 'kraken', 'kraken_db_custom', 'taxid', 'read_linage', 'lcs_ucsc_version', 'lcs_ucsc_predefined', 'lcs_ucsc_update', 'lcs_ucsc_downsampling', 'lcs_variant_groups', 'lcs_cutoff', 'isize_filter', 'primer_bed', 'primer_bedpe', 'primer_version', 'bamclipper_additional_parameters', 'vcount', 'frac', 'cov', 'vois', 'var_mqm', 'var_sap', 'var_qual', 'cns_min_cov', 'cns_gt_adjust', 'cns_indel_filter', 'n_threshold', 'seq_threshold', 'update', 'pangolin_docker_default', 'nextclade_docker_default', 'pangolin_conda_default', 'nextclade_conda_default', 'nextclade_dataset_name', 'nextclade_dataset_tag', 'output', 'reference_dir', 'read_dir', 'mapping_dir', 'variant_calling_dir', 'consensus_dir', 'linage_dir', 'report_dir', 'rki_dir', 'runinfo_dir', 'singularity_cache_dir', 'conda_cache_dir', 'databases', 'publish_dir_mode', 'cloudProcess', 'cloud-process'] def parameter_diff = params.keySet() - valid_params if (parameter_diff.size() != 0){ exit 1, "ERROR: Parameter(s) $parameter_diff is/are not valid in the pipeline!\n" @@ -323,7 +323,7 @@ workflow { // 11: linage assignment, genome quality assign_linages(generate_consensus.out.consensus_ambiguous) - genome_quality(generate_consensus.out.consensus_ambiguous, reference_ch) + genome_quality(generate_consensus.out.consensus_ambiguous, reference_ch, params.seq_threshold, params.n_threshold) // 12: report summary_report(generate_consensus.out.consensus_ambiguous, read_qc.out.fastp_json, kraken_reports.ifEmpty([]), mapping.out.mapping_stats, mapping.out.fragment_size, mapping.out.coverage, genome_quality.out.valid.map{it -> it[1]}, assign_linages.out.report, annotate_variant.out.nextclade_results, annotate_variant.out.nextclade_version, annotate_variant.out.nextclade_dataset_info, annotate_variant.out.sc2rf_result, vois.ifEmpty([]) ) diff --git a/modules/president.nf b/modules/president.nf index 54eb71d..9ab16fa 100644 --- a/modules/president.nf +++ b/modules/president.nf @@ -6,6 +6,8 @@ process president { input: tuple val(name), path(fasta) path(reference_fasta) + val(seq_threshold) + val(n_threshold) output: tuple val(name), path("${name}_report.tsv"), path("${name}_valid.fasta"), emit: valid @@ -13,7 +15,7 @@ process president { script: """ - president -r ${reference_fasta} -t $task.cpus -q ${fasta} -x 0.90 -n 0.05 -p . -f ${name}_ + president -r ${reference_fasta} -t $task.cpus -q ${fasta} -x ${seq_threshold} -n ${n_threshold} -p . -f ${name}_ """ stub: """ diff --git a/nextflow.config b/nextflow.config index 0afc2a9..47004fc 100644 --- a/nextflow.config +++ b/nextflow.config @@ -70,6 +70,10 @@ params { cns_min_cov = 20 cns_gt_adjust = 0.9 cns_indel_filter = 0.6 + + // consensus qc + seq_threshold = 0.90 + n_threshold = 0.05 // update settings and default container update = false diff --git a/workflows/genome_quality_wf.nf b/workflows/genome_quality_wf.nf index 2084607..78b2e7d 100644 --- a/workflows/genome_quality_wf.nf +++ b/workflows/genome_quality_wf.nf @@ -5,8 +5,10 @@ workflow genome_quality { take: fasta reference + seq_threshold + n_threshold main: - president(fasta, reference) + president(fasta, reference, seq_threshold, n_threshold) emit: valid = president.out.valid invalid = president.out.invalid