From b1d03dcd3b27a4bfcb099406622e1cae219a3f87 Mon Sep 17 00:00:00 2001 From: Vedanth Ramji <86137377+Vedanth-Ramji@users.noreply.github.com> Date: Wed, 21 Aug 2024 02:11:31 +0530 Subject: [PATCH] ADD code to give percentages & frequencies of perfect, strict, and loose hits (#65) get_rgi_hit_counts() computes the frequencu of perfect, strict, and loose hits after running RGI in db_harmonisation --- db_harmonisation/construct_megares_mapping.py | 8 ++++---- db_harmonisation/crude_db_harmonisation.py | 16 +++++++++++++++- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/db_harmonisation/construct_megares_mapping.py b/db_harmonisation/construct_megares_mapping.py index 45bdda2..dd7b3e6 100644 --- a/db_harmonisation/construct_megares_mapping.py +++ b/db_harmonisation/construct_megares_mapping.py @@ -93,8 +93,8 @@ def generate_missing_mappings_fasta(missing_mappings, megares_fasta): """ Get protein file for missing CDSs to pass to RGI and nucleotide file for missing contigs to pass to RGI """ - ofile1 = './megares_cds.fasta' - ofile2 = './megares_contigs.fasta' + ofile1 = './dbs/megares_cds.fasta' + ofile2 = './dbs/megares_contigs.fasta' with open(megares_fasta) as ifile, open(ofile1, 'w') as megares_cds, open(ofile2, 'w') as megares_contigs: for record in SeqIO.parse(ifile, 'fasta'): if record.id not in missing_mappings: @@ -127,7 +127,7 @@ def setup_for_rgi(): @TaskGenerator def get_cds_rgi_output(cds_fasta): - ofile = './megares_cds_rgi_output' + ofile = './mapping/megares_cds_rgi_output' subprocess.check_call([ 'rgi', 'main', @@ -142,7 +142,7 @@ def get_cds_rgi_output(cds_fasta): @TaskGenerator def get_contig_rgi_output(contig_fasta): - ofile = './megares_contigs_rgi_output' + ofile = './mapping/megares_contigs_rgi_output' subprocess.check_call([ 'rgi', 'main', diff --git a/db_harmonisation/crude_db_harmonisation.py b/db_harmonisation/crude_db_harmonisation.py index 59ff409..7192616 100644 --- a/db_harmonisation/crude_db_harmonisation.py +++ b/db_harmonisation/crude_db_harmonisation.py @@ -109,6 +109,18 @@ def run_rgi(fa): def move_mappings_to_argnorm(aro_mapping): shutil.copy(aro_mapping, '../argnorm/data') +@TaskGenerator +def get_rgi_hit_counts(): + import pandas as pd + + dfs = [] + for dir in os.listdir('./mapping'): + if 'rgi.txt' in dir or 'rgi_output.txt' in dir: + dfs.append(pd.read_csv(f'./mapping/{dir}', sep='\t')) + + comb_df = pd.concat(dfs) + comb_df.to_csv('./mapping/combined_ARO_mapping.tsv', sep='\t') + # Calling tasks create_out_dirs() barrier() @@ -122,4 +134,6 @@ def move_mappings_to_argnorm(aro_mapping): ]: move_mappings_to_argnorm(run_rgi(db)) construct_megares() -get_groot_aro_mapping() \ No newline at end of file +get_groot_aro_mapping() +barrier() +get_rgi_hit_counts()