From 9dad24b58330451572f9bf1f5df8c0c965b25973 Mon Sep 17 00:00:00 2001 From: "(major) john (major)" Date: Mon, 8 Sep 2025 21:32:58 -0700 Subject: [PATCH] Add NeuSomatic somatic and ensemble rules --- .../local/templates/rule_config.yaml | 11 +++ .../slurm/templates/rule_config.yaml | 11 +++ workflow/Snakefile | 1 + workflow/rules/neusomatic.smk | 96 +++++++++++++++++++ workflow/rules/rule_common.smk | 1 + 5 files changed, 120 insertions(+) create mode 100644 workflow/rules/neusomatic.smk diff --git a/config/day_profiles/local/templates/rule_config.yaml b/config/day_profiles/local/templates/rule_config.yaml index 6d9c26d..6a8dd6e 100755 --- a/config/day_profiles/local/templates/rule_config.yaml +++ b/config/day_profiles/local/templates/rule_config.yaml @@ -196,6 +196,17 @@ deepsomatic: numa: " OMP_NUM_THREADS=8 OMP_PROC_BIND=close OMP_PLACES=threads OMP_PROC_BIND=TRUE OMP_DYNAMIC=TRUE OMP_MAX_ACTIVE_LEVELS=1 OMP_SCHEDULE=dynamic OMP_WAIT_POLICY=ACTIVE " dvsom_conda: "../envs/vanilla_v0.1.yaml" +neusomatic: + threads: 8 + env_yaml: "../envs/vanilla_v0.1.yaml" + container: "docker://bioinform/neusomatic:0.2.1" + partition: "i8" + mem_mb: 60000 + hg38_neusom_chrms: "21,22" + hg38_broad_neusom_chrms: "21,22" + b37_neusom_chrms: "21,22" + numa: "" + duphold: threads: 7 env_yaml: "../envs/duphold_v0.1.yaml" diff --git a/config/day_profiles/slurm/templates/rule_config.yaml b/config/day_profiles/slurm/templates/rule_config.yaml index 4f34c4b..87c67cb 100755 --- a/config/day_profiles/slurm/templates/rule_config.yaml +++ b/config/day_profiles/slurm/templates/rule_config.yaml @@ -195,6 +195,17 @@ deepsomatic: numa: " OMP_THREADS=64 OMP_PROC_BIND=close OMP_PLACES=threads OMP_DYNAMIC=true OMP_MAX_ACTIVE_LEVELS=1 OMP_SCHEDULE=dynamic OMP_WAIT_POLICY=ACTIVE " dvsom_conda: "../envs/vanilla_v0.1.yaml" +neusomatic: + threads: 42 + env_yaml: "../envs/vanilla_v0.1.yaml" + container: "docker://bioinform/neusomatic:0.2.1" + partition: "i192,i128,i192mem" + mem_mb: 85000 + hg38_neusom_chrms: "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" + hg38_broad_neusom_chrms: "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" + b37_neusom_chrms: "1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24" + numa: "" + duphold: diff --git a/workflow/Snakefile b/workflow/Snakefile index 49bbadb..5330fb8 100755 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -265,6 +265,7 @@ include: "rules/deepvariant_1_5.smk" include: "rules/deepvariant_1_9.smk" include: "rules/deepvariant_ug.smk" include: "rules/deepsomatic.smk" +include: "rules/neusomatic.smk" include: "rules/doppel_mrkdups.smk" include: "rules/duphold.smk" include: "rules/dysgu_sv.smk" diff --git a/workflow/rules/neusomatic.smk b/workflow/rules/neusomatic.smk new file mode 100644 index 0000000..70dc570 --- /dev/null +++ b/workflow/rules/neusomatic.smk @@ -0,0 +1,96 @@ +import os + +##### neusomatic +# --------------------------- + + +def get_neusom_ensemble_callers(wildcards): + vcfs = [] + base = MDIR + f"{wildcards.sample}/align/{wildcards.alnr}/snv" + mapping = { + "mutect2": f"{base}/mutect2/{wildcards.sample}.{wildcards.alnr}.mutect2.vcf", + "strelka2": f"{base}/strelka2/{wildcards.sample}.{wildcards.alnr}.strelka2.vcf", + "vardict": f"{base}/vardict/{wildcards.sample}.{wildcards.alnr}.vardict.vcf", + "varscan2": f"{base}/varscan2/{wildcards.sample}.{wildcards.alnr}.varscan2.vcf", + } + for caller in ["mutect2", "strelka2", "vardict", "varscan2"]: + if caller in somatic_snv_CALLERS: + vcfs.append(mapping[caller]) + return vcfs + + +rule neusomatic: + wildcard_constraints: + sample=TUMORS_REGEX + input: + tumor_cram=get_somcall_tumor_cram, + tumor_crai=get_somcall_tumor_crai, + normal_cram=get_somcall_normal_cram, + normal_crai=get_somcall_normal_crai, + ref_fa=lambda wc: config["supporting_files"]["files"]["huref"]["fasta"]["name"], + ref_fai=lambda wc: config["supporting_files"]["files"]["huref"]["fasta"]["name"] + ".fai", + output: + vcf=MDIR + "{sample}/align/{alnr}/snv/neusomatic/{sample}.{alnr}.neusomatic.snv.vcf", + log: + MDIR + "{sample}/align/{alnr}/snv/neusomatic/log/{sample}.{alnr}.neusomatic.snv.log", + threads: config['neusomatic']['threads'] + container: + config['neusomatic']['container'] + resources: + vcpu=config['neusomatic']['threads'], + threads=config['neusomatic']['threads'], + partition=config['neusomatic']['partition'], + mem_mb=config['neusomatic']['mem_mb'], + params: + cluster_sample=ret_sample, + numa=config['neusomatic']['numa'], + shell: + r""" + set -euo pipefail + {params.numa} neusomatic.py call \ + --output {output.vcf} \ + --tumor {input.tumor_cram} \ + --normal {input.normal_cram} \ + --ref {input.ref_fa} \ + --threads {threads} >> {log} 2>&1 + """ + + +rule neusomatic_ensemble: + wildcard_constraints: + sample=TUMORS_REGEX + input: + tumor_cram=get_somcall_tumor_cram, + tumor_crai=get_somcall_tumor_crai, + normal_cram=get_somcall_normal_cram, + normal_crai=get_somcall_normal_crai, + ref_fa=lambda wc: config["supporting_files"]["files"]["huref"]["fasta"]["name"], + ref_fai=lambda wc: config["supporting_files"]["files"]["huref"]["fasta"]["name"] + ".fai", + callers=get_neusom_ensemble_callers, + output: + vcf=MDIR + "{sample}/align/{alnr}/snv/neusomatic/{sample}.{alnr}.neusomatic_ensemble.snv.vcf", + log: + MDIR + "{sample}/align/{alnr}/snv/neusomatic/log/{sample}.{alnr}.neusomatic_ensemble.snv.log", + threads: config['neusomatic']['threads'] + container: + config['neusomatic']['container'] + resources: + vcpu=config['neusomatic']['threads'], + threads=config['neusomatic']['threads'], + partition=config['neusomatic']['partition'], + mem_mb=config['neusomatic']['mem_mb'], + params: + cluster_sample=ret_sample, + numa=config['neusomatic']['numa'], + caller_vcfs=lambda wildcards: " ".join(get_neusom_ensemble_callers(wildcards)), + shell: + r""" + set -euo pipefail + {params.numa} neusomatic.py ensemble \ + --output {output.vcf} \ + --tumor {input.tumor_cram} \ + --normal {input.normal_cram} \ + --ref {input.ref_fa} \ + --callers {params.caller_vcfs} \ + --threads {threads} >> {log} 2>&1 + """ diff --git a/workflow/rules/rule_common.smk b/workflow/rules/rule_common.smk index d6b7c85..26ed29f 100755 --- a/workflow/rules/rule_common.smk +++ b/workflow/rules/rule_common.smk @@ -90,6 +90,7 @@ OCTO_CHRMS = config["octopus"][f"{config['genome_build']}_octo_chrms"].split("," CLAIR3_CHRMS = config["clair3"][f"{config['genome_build']}_clair3_chrms"].split(",") LOFREQ_CHRMS = config["lofreq2"][f"{config['genome_build']}_lofreq_chrms"].split(",") DVSOM_CHRMS = config["deepsomatic"][f"{config['genome_build']}_dvsom_chrms"].split(",") +NEUSOM_CHRMS = config["neusomatic"][f"{config['genome_build']}_neusom_chrms"].split(",") SENTTN_CHRMS = config["senttn"][f"{config['genome_build']}_senttn_chrms"].split(",") VARN_CHRMS = (