Skip to content

Commit

Permalink
ADD code to give percentages & frequencies of perfect, strict, and lo…
Browse files Browse the repository at this point in the history
…ose hits (#65)

get_rgi_hit_counts() computes the frequencu of perfect, strict, and loose hits after running RGI in db_harmonisation
  • Loading branch information
Vedanth-Ramji committed Aug 20, 2024
1 parent f56a2dd commit b1d03dc
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 5 deletions.
8 changes: 4 additions & 4 deletions db_harmonisation/construct_megares_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ def generate_missing_mappings_fasta(missing_mappings, megares_fasta):
"""
Get protein file for missing CDSs to pass to RGI and nucleotide file for missing contigs to pass to RGI
"""
ofile1 = './megares_cds.fasta'
ofile2 = './megares_contigs.fasta'
ofile1 = './dbs/megares_cds.fasta'
ofile2 = './dbs/megares_contigs.fasta'
with open(megares_fasta) as ifile, open(ofile1, 'w') as megares_cds, open(ofile2, 'w') as megares_contigs:
for record in SeqIO.parse(ifile, 'fasta'):
if record.id not in missing_mappings:
Expand Down Expand Up @@ -127,7 +127,7 @@ def setup_for_rgi():

@TaskGenerator
def get_cds_rgi_output(cds_fasta):
ofile = './megares_cds_rgi_output'
ofile = './mapping/megares_cds_rgi_output'
subprocess.check_call([
'rgi',
'main',
Expand All @@ -142,7 +142,7 @@ def get_cds_rgi_output(cds_fasta):

@TaskGenerator
def get_contig_rgi_output(contig_fasta):
ofile = './megares_contigs_rgi_output'
ofile = './mapping/megares_contigs_rgi_output'
subprocess.check_call([
'rgi',
'main',
Expand Down
16 changes: 15 additions & 1 deletion db_harmonisation/crude_db_harmonisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,18 @@ def run_rgi(fa):
def move_mappings_to_argnorm(aro_mapping):
shutil.copy(aro_mapping, '../argnorm/data')

@TaskGenerator
def get_rgi_hit_counts():
import pandas as pd

dfs = []
for dir in os.listdir('./mapping'):
if 'rgi.txt' in dir or 'rgi_output.txt' in dir:
dfs.append(pd.read_csv(f'./mapping/{dir}', sep='\t'))

comb_df = pd.concat(dfs)
comb_df.to_csv('./mapping/combined_ARO_mapping.tsv', sep='\t')

# Calling tasks
create_out_dirs()
barrier()
Expand All @@ -122,4 +134,6 @@ def move_mappings_to_argnorm(aro_mapping):
]:
move_mappings_to_argnorm(run_rgi(db))
construct_megares()
get_groot_aro_mapping()
get_groot_aro_mapping()
barrier()
get_rgi_hit_counts()

0 comments on commit b1d03dc

Please sign in to comment.