diff --git a/db_harmonisation/construct_megares_mapping.py b/db_harmonisation/construct_megares_mapping.py index 45bdda2..dd7b3e6 100644 --- a/db_harmonisation/construct_megares_mapping.py +++ b/db_harmonisation/construct_megares_mapping.py @@ -93,8 +93,8 @@ def generate_missing_mappings_fasta(missing_mappings, megares_fasta): """ Get protein file for missing CDSs to pass to RGI and nucleotide file for missing contigs to pass to RGI """ - ofile1 = './megares_cds.fasta' - ofile2 = './megares_contigs.fasta' + ofile1 = './dbs/megares_cds.fasta' + ofile2 = './dbs/megares_contigs.fasta' with open(megares_fasta) as ifile, open(ofile1, 'w') as megares_cds, open(ofile2, 'w') as megares_contigs: for record in SeqIO.parse(ifile, 'fasta'): if record.id not in missing_mappings: @@ -127,7 +127,7 @@ def setup_for_rgi(): @TaskGenerator def get_cds_rgi_output(cds_fasta): - ofile = './megares_cds_rgi_output' + ofile = './mapping/megares_cds_rgi_output' subprocess.check_call([ 'rgi', 'main', @@ -142,7 +142,7 @@ def get_cds_rgi_output(cds_fasta): @TaskGenerator def get_contig_rgi_output(contig_fasta): - ofile = './megares_contigs_rgi_output' + ofile = './mapping/megares_contigs_rgi_output' subprocess.check_call([ 'rgi', 'main', diff --git a/db_harmonisation/crude_db_harmonisation.py b/db_harmonisation/crude_db_harmonisation.py index 9950f50..cc4886b 100644 --- a/db_harmonisation/crude_db_harmonisation.py +++ b/db_harmonisation/crude_db_harmonisation.py @@ -9,6 +9,7 @@ from Bio.Seq import translate, Seq from construct_megares_mapping import construct_megares from construct_groot_mappings import get_groot_aro_mapping +import pandas as pd @TaskGenerator def create_out_dirs(): @@ -116,6 +117,20 @@ def run_rgi(fa): def move_mappings_to_argnorm(aro_mapping): shutil.copy(aro_mapping, '../argnorm/data') +@TaskGenerator +def get_rgi_hit_counts(): + dfs = [] + + for dir in os.listdir('./mapping'): + if 'rgi.txt' in dir or 'rgi_output.txt' in dir: + dfs.append(pd.read_csv(f'./mapping/{dir}', sep='\t')) + + comb_df = pd.concat(dfs) + comb_df.to_csv('./mapping/combined_ARO_mapping.tsv', sep='\t') + + for i in set(comb_df['Cut_Off']): + print(f"{i} hits: {list(comb_df['Cut_Off']).count(i) / len(list(comb_df['Cut_Off'])) * 100}% ({list(comb_df['Cut_Off']).count(i)})") + # Calling tasks create_out_dirs() barrier() @@ -129,4 +144,6 @@ def move_mappings_to_argnorm(aro_mapping): ]: move_mappings_to_argnorm(run_rgi(db)) construct_megares() -get_groot_aro_mapping() \ No newline at end of file +get_groot_aro_mapping() +barrier() +get_rgi_hit_counts()