Skip to content

Commit

Permalink
ADD code to give percentages & frequencies of perfect, strict, and lo…
Browse files Browse the repository at this point in the history
…ose hits

get_rgi_hit_counts() prints the frequencies & percentages of perfect, strict, and loose hits after running RGI in db_harmonisation
  • Loading branch information
Vedanth-Ramji committed Aug 19, 2024
1 parent 6576179 commit f8d93d3
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 5 deletions.
8 changes: 4 additions & 4 deletions db_harmonisation/construct_megares_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def generate_missing_mappings_fasta(missing_mappings, megares_fasta):
"""
Get protein file for missing CDSs to pass to RGI and nucleotide file for missing contigs to pass to RGI
"""
ofile1 = './megares_cds.fasta'
ofile2 = './megares_contigs.fasta'
ofile1 = './dbs/megares_cds.fasta'
ofile2 = './dbs/megares_contigs.fasta'
with open(megares_fasta) as ifile, open(ofile1, 'w') as megares_cds, open(ofile2, 'w') as megares_contigs:
for record in SeqIO.parse(ifile, 'fasta'):
if record.id not in missing_mappings:
Expand Down Expand Up @@ -127,7 +127,7 @@ def setup_for_rgi():

@TaskGenerator
def get_cds_rgi_output(cds_fasta):
ofile = './megares_cds_rgi_output'
ofile = './mapping/megares_cds_rgi_output'
subprocess.check_call([
'rgi',
'main',
Expand All @@ -142,7 +142,7 @@ def get_cds_rgi_output(cds_fasta):

@TaskGenerator
def get_contig_rgi_output(contig_fasta):
ofile = './megares_contigs_rgi_output'
ofile = './mapping/megares_contigs_rgi_output'
subprocess.check_call([
'rgi',
'main',
Expand Down
19 changes: 18 additions & 1 deletion db_harmonisation/crude_db_harmonisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from Bio.Seq import translate, Seq
from construct_megares_mapping import construct_megares
from construct_groot_mappings import get_groot_aro_mapping
import pandas as pd

@TaskGenerator
def create_out_dirs():
Expand Down Expand Up @@ -116,6 +117,20 @@ def run_rgi(fa):
def move_mappings_to_argnorm(aro_mapping):
shutil.copy(aro_mapping, '../argnorm/data')

@TaskGenerator
def get_rgi_hit_counts():
dfs = []

for dir in os.listdir('./mapping'):
if 'rgi.txt' in dir or 'rgi_output.txt' in dir:
dfs.append(pd.read_csv(f'./mapping/{dir}', sep='\t'))

comb_df = pd.concat(dfs)
comb_df.to_csv('./mapping/combined_ARO_mapping.tsv', sep='\t')

for i in set(comb_df['Cut_Off']):
print(f"{i} hits: {list(comb_df['Cut_Off']).count(i) / len(list(comb_df['Cut_Off'])) * 100}% ({list(comb_df['Cut_Off']).count(i)})")

# Calling tasks
create_out_dirs()
barrier()
Expand All @@ -129,4 +144,6 @@ def move_mappings_to_argnorm(aro_mapping):
]:
move_mappings_to_argnorm(run_rgi(db))
construct_megares()
get_groot_aro_mapping()
get_groot_aro_mapping()
barrier()
get_rgi_hit_counts()

0 comments on commit f8d93d3

Please sign in to comment.