From 5e7d8c04636bde864126d550123179e9446ebbd9 Mon Sep 17 00:00:00 2001 From: pdimens Date: Mon, 25 Nov 2024 14:30:28 -0500 Subject: [PATCH] better output folder structure --- harpy/snakefiles/simulate_snpindel.smk | 38 +++++++-------- harpy/snakefiles/simulate_variants.smk | 64 +++++++++++++------------- 2 files changed, 51 insertions(+), 51 deletions(-) diff --git a/harpy/snakefiles/simulate_snpindel.smk b/harpy/snakefiles/simulate_snpindel.smk index 965474b36..8cea5cbd2 100644 --- a/harpy/snakefiles/simulate_snpindel.smk +++ b/harpy/snakefiles/simulate_snpindel.smk @@ -108,15 +108,33 @@ rule simulate_haploid: shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.parameters} > {log}" +rule rename_haploid: + input: + fasta = f"{outdir}/{outprefix}.simseq.genome.fa", + snpvcf = f"{outdir}/{outprefix}.refseq2simseq.SNP.vcf" if snp else [], + indelvcf = f"{outdir}/{outprefix}.refseq2simseq.INDEL.vcf" if indel else [], + mapfile = f"{outdir}/{outprefix}.refseq2simseq.map.txt" + output: + fasta = f"{outdir}/{outprefix}.fasta", + snpvcf = f"{outdir}/{outprefix}.snp.vcf" if snp else [], + indelvcf = f"{outdir}/{outprefix}.indel.vcf" if indel else [], + mapfile = f"{outdir}/{outprefix}.map" + run: + for i,j in zip(input, output): + if i: + os.rename(i,j) + rule diploid_snps: input: - f"{outdir}/{outprefix}.refseq2simseq.SNP.vcf" + f"{outdir}/{outprefix}.snp.vcf" output: f"{outdir}/haplotype_1/{outprefix}.hap1.snp.vcf", f"{outdir}/haplotype_2/{outprefix}.hap2.snp.vcf" params: het = heterozygosity run: + os.makedirs(f"{outdir}/haplotype_1", exist_ok = True) + os.makedirs(f"{outdir}/haplotype_2", exist_ok = True) rng = random.Random(randomseed) if randomseed else random.Random() with open(input[0], "r") as in_vcf, open(output[0], "w") as hap1, open(output[1], "w") as hap2: for line in in_vcf: @@ -131,7 +149,7 @@ rule diploid_snps: use rule diploid_snps as diploid_indels with: input: - f"{outdir}/{outprefix}.refseq2simseq.INDEL.vcf" + f"{outdir}/{outprefix}.indel.vcf" output: f"{outdir}/haplotype_1/{outprefix}.hap1.indel.vcf", f"{outdir}/haplotype_2/{outprefix}.hap2.indel.vcf" @@ -157,22 +175,6 @@ rule simulate_diploid: shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.snp} {params.indel} > {log}" -rule rename_haploid: - input: - fasta = f"{outdir}/{outprefix}.simseq.genome.fa", - snpvcf = f"{outdir}/{outprefix}.refseq2simseq.SNP.vcf" if snp else [], - indelvcf = f"{outdir}/{outprefix}.refseq2simseq.INDEL.vcf" if indel else [], - mapfile = f"{outdir}/{outprefix}.refseq2simseq.map.txt" - output: - fasta = f"{outdir}/{outprefix}.fasta", - snpvcf = f"{outdir}/{outprefix}.snp.vcf" if snp else [], - indelvcf = f"{outdir}/{outprefix}.indel.vcf" if indel else [], - mapfile = f"{outdir}/{outprefix}.map" - run: - for i,j in zip(input, output): - if i: - os.rename(i,j) - rule rename_diploid: input: fasta= f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.simseq.genome.fa", diff --git a/harpy/snakefiles/simulate_variants.smk b/harpy/snakefiles/simulate_variants.smk index 92482b0a8..137e44030 100644 --- a/harpy/snakefiles/simulate_variants.smk +++ b/harpy/snakefiles/simulate_variants.smk @@ -70,15 +70,30 @@ rule simulate_haploid: shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.parameters} > {log}" +rule rename_haploid: + input: + fasta = f"{outdir}/{outprefix}.simseq.genome.fa", + vcf = f"{outdir}/{outprefix}.refseq2simseq.{variant}.vcf", + mapfile = f"{outdir}/{outprefix}.refseq2simseq.map.txt" + output: + fasta = f"{outdir}/{outprefix}.fasta", + vcf = f"{outdir}/{outprefix}.{variant}.vcf", + mapfile = f"{outdir}/{outprefix}.{variant}.map" + run: + for i,j in zip(input, output): + os.rename(i,j) + rule diploid_variants: input: - f"{outdir}/{outprefix}.refseq2simseq.{variant}.vcf" + f"{outdir}/{outprefix}.{variant}.vcf" output: - f"{outdir}/diploid/{outprefix}.hap1.{variant}.vcf", - f"{outdir}/diploid/{outprefix}.hap2.{variant}.vcf" + f"{outdir}/haplotype_1/{outprefix}.hap1.{variant}.vcf", + f"{outdir}/haplotype_2/{outprefix}.hap2.{variant}.vcf" params: het = heterozygosity run: + os.makedirs(f"{outdir}/haplotype_1", exist_ok = True) + os.makedirs(f"{outdir}/haplotype_2", exist_ok = True) rng = random.Random(randomseed) if randomseed else random.Random() with open(input[0], "r") as in_vcf, open(output[0], "w") as hap1, open(output[1], "w") as hap2: for line in in_vcf: @@ -93,58 +108,41 @@ rule diploid_variants: rule simulate_diploid: input: - hap = f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.{variant}.vcf", + hap = f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.{variant}.vcf", geno = genome output: - f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.simseq.genome.fa", - f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.refseq2simseq.map.txt", - temp(f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.refseq2simseq.{variant}.vcf") + f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.simseq.genome.fa", + f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.refseq2simseq.map.txt", + temp(f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.refseq2simseq.{variant}.vcf") log: f"{outdir}/logs/{outprefix}.hap{{haplotype}}.log" params: - prefix = f"{outdir}/diploid/{outprefix}.hap{{haplotype}}", + prefix = f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}", vcf_arg = f"-{variant}_vcf" conda: f"{envdir}/simulations.yaml" shell: "simuG -refseq {input.geno} -prefix {params.prefix} {params.vcf_arg} {input.hap} > {log}" -rule rename_haploid: +rule rename_diploid: input: - fasta = f"{outdir}/{outprefix}.simseq.genome.fa", - vcf = f"{outdir}/{outprefix}.refseq2simseq.{variant}.vcf", - mapfile = f"{outdir}/{outprefix}.refseq2simseq.map.txt" + fasta = f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.simseq.genome.fa", + mapfile = f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.refseq2simseq.map.txt" output: - fasta = f"{outdir}/{outprefix}.fasta", - vcf = f"{outdir}/{outprefix}.{variant}.vcf", - mapfile = f"{outdir}/{outprefix}.{variant}.map" + fasta = f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.fasta", + mapfile = f"{outdir}/haplotype_{{haplotype}}/{outprefix}.hap{{haplotype}}.{variant}.map" run: for i,j in zip(input, output): os.rename(i,j) -rule rename_diploid: - input: - fasta = f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.simseq.genome.fa", - mapfile = f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.refseq2simseq.map.txt" - output: - fasta = f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.fasta", - mapfile = f"{outdir}/diploid/{outprefix}.hap{{haplotype}}.{variant}.map" - container: - None - shell: - """ - mv {input.fasta} {output.fasta} - mv {input.mapfile} {output.mapfile} - """ - rule workflow_summary: default_target: True input: f"{outdir}/{outprefix}.fasta", f"{outdir}/{outprefix}.{variant}.vcf", - collect(f"{outdir}/diploid/{outprefix}.hap" + "{n}.fasta", n = [1,2]) if heterozygosity > 0 and not only_vcf else [], - collect(f"{outdir}/diploid/{outprefix}.hap" + "{n}" + f".{variant}.vcf", n = [1,2]) if heterozygosity > 0 else [], - collect(f"{outdir}/diploid/{outprefix}.hap" + "{n}" + f".{variant}.map", n = [1,2]) if heterozygosity > 0 else [] + collect(f"{outdir}/haplotype_" + "{n}" + f"/{outprefix}.hap" + "{n}.fasta", n = [1,2]) if heterozygosity > 0 and not only_vcf else [], + collect(f"{outdir}/haplotype_" + "{n}" + f"/{outprefix}.hap" + "{n}" + f".{variant}.vcf", n = [1,2]) if heterozygosity > 0 else [], + collect(f"{outdir}/haplotype_" + "{n}" + f"/{outprefix}.hap" + "{n}" + f".{variant}.map", n = [1,2]) if heterozygosity > 0 else [] params: prefix = f"{outdir}/{outprefix}", parameters = variant_params,