Skip to content

Commit

Permalink
rename assembly 1
Browse files Browse the repository at this point in the history
  • Loading branch information
SilasK committed Aug 28, 2023
1 parent 98ae5d9 commit e7f1449
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 12 deletions.
20 changes: 10 additions & 10 deletions workflow/rules/assemble.smk
Original file line number Diff line number Diff line change
Expand Up @@ -473,22 +473,22 @@ rule rename_contigs:
input:
"{sample}/assembly/{sample}_raw_contigs.fasta",
output:
"{sample}/assembly/{sample}_prefilter_contigs.fasta",
conda:
"%s/required_packages.yaml" % CONDAENV
fasta="{sample}/assembly/{sample}_prefilter_contigs.fasta",
mapping_table = "{sample}/assembly/old2new_contig_names.tsv"
threads: config.get("simplejob_threads", 1)
resources:
mem=config["simplejob_mem"],
time=config["runtime"]["simplejob"],
time=config["runtime"]["default"],
log:
"{sample}/logs/assembly/post_process/rename_and_filter_size.log",
params:
minlength=config["minimum_contig_length"],
shell:
"rename.sh "
" in={input} out={output} ow=t "
" prefix={wildcards.sample} "
" minscaf={params.minlength} &> {log} "
conda:
"../envs/fasta.yaml"
script:
"../scripts/rename_assembly.py"




if config["filter_contigs"]:
Expand Down Expand Up @@ -581,7 +581,7 @@ else: # no filter

rule do_not_filter_contigs:
input:
rules.rename_contigs.output,
"{sample}/assembly/{sample}_prefilter_contigs.fasta",
output:
"{sample}/assembly/{sample}_final_contigs.fasta",
threads: 1
Expand Down
2 changes: 1 addition & 1 deletion workflow/scripts/parse_semibin.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,4 @@ def handle_exception(exc_type, exc_value, exc_traceback):
)

with open(snakemake.output[0], "w") as outf:
outf.write("{sample}_0\t{sample}_SemiBin_1\n".format(**snakemake.wildcards))
outf.write("{sample}_1\t{sample}_SemiBin_1\n".format(**snakemake.wildcards))
2 changes: 1 addition & 1 deletion workflow/scripts/parse_vamb.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,4 @@ def handle_exception(exc_type, exc_value, exc_traceback):
for sample in samples_without_bins:
sample_output_path = cluster_output_path.format(sample=sample)
with open(sample_output_path, "w") as fout:
fout.write(f"{sample}_0\t{sample}_vamb_1\n")
fout.write(f"{sample}_1\t{sample}_vamb_1\n")
55 changes: 55 additions & 0 deletions workflow/scripts/rename_assembly.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#! /usr/bin/env python


import sys, os
import logging, traceback

logging.basicConfig(
filename=snakemake.log[0],
level=logging.INFO,
format="%(asctime)s %(message)s",
datefmt="%Y-%m-%d %H:%M:%S",
)


def handle_exception(exc_type, exc_value, exc_traceback):
if issubclass(exc_type, KeyboardInterrupt):
sys.__excepthook__(exc_type, exc_value, exc_traceback)
return

logging.error(
"".join(
[
"Uncaught exception: ",
*traceback.format_exception(exc_type, exc_value, exc_traceback),
]
)
)


# Install exception handler
sys.excepthook = handle_exception


from Bio import SeqIO

# Open the snakemake.output FASTA file and mapping table file for writing
with open(snakemake.output.fasta, "w") as output_handle, open(
snakemake.output.mapping_table, "w"
) as mapping_table_handle:
i = 1

for record in SeqIO.parse(snakemake.input[0], "fasta"):
if len(record) < snakemake.params.minlength:
break

old_name = record.id
new_name = f"{snakemake.wildcards.sample}_{i}"
record.id = new_name
record.description = ""

SeqIO.write(record, output_handle, "fasta")

mapping_table_handle.write(f"{new_name}\t{old_name}\n")

i += 1

0 comments on commit e7f1449

Please sign in to comment.