Skip to content

Commit

Permalink
RFCT remove hits calculation and import pandas in get_rgi_hits_counts…
Browse files Browse the repository at this point in the history
…() rather than outside

Code to display percentages of perfect, strict, and loose hits was removed as it is a trivial calculation.

'pandas' is now imported in get_rgi_hit_counts() rather than outside the function to avoid slowing down other operations.
  • Loading branch information
Vedanth-Ramji committed Aug 20, 2024
1 parent f8d93d3 commit 8e35386
Showing 1 changed file with 2 additions and 5 deletions.
7 changes: 2 additions & 5 deletions db_harmonisation/crude_db_harmonisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
from Bio.Seq import translate, Seq
from construct_megares_mapping import construct_megares
from construct_groot_mappings import get_groot_aro_mapping
import pandas as pd

@TaskGenerator
def create_out_dirs():
Expand Down Expand Up @@ -119,18 +118,16 @@ def move_mappings_to_argnorm(aro_mapping):

@TaskGenerator
def get_rgi_hit_counts():
dfs = []
import pandas as pd

dfs = []
for dir in os.listdir('./mapping'):
if 'rgi.txt' in dir or 'rgi_output.txt' in dir:
dfs.append(pd.read_csv(f'./mapping/{dir}', sep='\t'))

comb_df = pd.concat(dfs)
comb_df.to_csv('./mapping/combined_ARO_mapping.tsv', sep='\t')

for i in set(comb_df['Cut_Off']):
print(f"{i} hits: {list(comb_df['Cut_Off']).count(i) / len(list(comb_df['Cut_Off'])) * 100}% ({list(comb_df['Cut_Off']).count(i)})")

# Calling tasks
create_out_dirs()
barrier()
Expand Down

0 comments on commit 8e35386

Please sign in to comment.