From f7c566f6c70cad832943b5fa2067237e75776f38 Mon Sep 17 00:00:00 2001 From: cpauvert Date: Wed, 17 Apr 2024 16:15:21 +0200 Subject: [PATCH] explicitly state headers and comments for bakta annotations this fixes gitlab clavellab/genome-assembly#14 --- workflow/scripts/check_tRNAs_5S.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/workflow/scripts/check_tRNAs_5S.py b/workflow/scripts/check_tRNAs_5S.py index 16c13f8..70ead4d 100644 --- a/workflow/scripts/check_tRNAs_5S.py +++ b/workflow/scripts/check_tRNAs_5S.py @@ -4,7 +4,11 @@ sys.stderr = open(snakemake.log[0], "w") # Read Bakta tabular annotations -annotations = pd.read_table(snakemake.input[0], sep="\t", header=2) +# Header of TSV files changes between v1.6.3 and v1.9.3 +# https://git.rwth-aachen.de/clavellab/genome-assembly/-/issues/14 +# So more flexible input options. +annotations = pd.read_table(snakemake.input[0], sep="\t", header=0, comment = "#", + names = ["Sequence Id","Start","Stop","Strand","Locus Tag","Gene","Product","DbXrefs"]) # Casting the column as string to fix a AttributeError when no gene names are found # Can only use .str accessor with string values annotations['Gene']=annotations['Gene'].astype(str)