Skip to content

Commit 08be82f

Browse files
committed
Simplified code to run MAFFT and verify results.
1 parent 3472b9d commit 08be82f

File tree

5 files changed

+101
-102
lines changed

5 files changed

+101
-102
lines changed

CHEWBBACA/AlleleCallEvaluator/evaluate_calls.py

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -419,7 +419,7 @@ def main(input_files, schema_directory, output_directory, annotations,
419419

420420
protein_files = [r[1] for r in results]
421421

422-
# Align sequences with MAFFT
422+
# Run MAFFT to compute MSA
423423
print('\nDetermining the MSA for each locus...')
424424
mafft_outdir = fo.join_paths(temp_directory, ['alignment_files'])
425425
fo.create_directory(mafft_outdir)
@@ -436,15 +436,23 @@ def main(input_files, schema_directory, output_directory, annotations,
436436
common_args,
437437
mw.call_mafft)
438438

439-
results = mo.map_async_parallelizer(inputs,
440-
mo.function_helper,
441-
cpu_cores,
442-
show_progress=True)
439+
mafft_results = mo.map_async_parallelizer(inputs,
440+
mo.function_helper,
441+
cpu_cores,
442+
show_progress=True)
443+
444+
# Identify cases where MAFFT failed
445+
mafft_failed = [r[0] for r in mafft_results if r[1] is False]
446+
if len(mafft_failed) > 0:
447+
print(f'\nCould not determine MSA for {len(mafft_failed)} loci.')
448+
449+
# Get files that were created by MAFFT
450+
mafft_successful = [r[0] for r in mafft_results if r[1] is True]
443451

444452
print('\nCreating file with the full cgMLST alignment...', end='')
445453
# Concatenate all alignment files and index with BioPython
446454
concat_aln = fo.join_paths(mafft_outdir, ['cgMLST_concat.fasta'])
447-
fo.concatenate_files(mafft_outfiles, concat_aln)
455+
fo.concatenate_files(mafft_successful, concat_aln)
448456
# Index file
449457
concat_index = fao.index_fasta(concat_aln)
450458
sample_alignment_files = []
@@ -495,25 +503,15 @@ def main(input_files, schema_directory, output_directory, annotations,
495503
report_html_file = fo.join_paths(output_directory, [ct.ALLELECALL_REPORT_BASENAME])
496504
fo.write_to_file(report_html, report_html_file, 'w', '\n')
497505

498-
# Copy JS bundle to output directory
506+
# Copy the JS bundle to the output directory
507+
# When chewBBACA is installed
499508
script_path = os.path.dirname(os.path.abspath(__file__))
500-
parent_dir = os.path.dirname(script_path)
501-
try:
502-
fo.copy_file(fo.join_paths(script_path,
503-
['report_template_components',
504-
'src',
505-
'bundles',
506-
'AlleleCallEvaluator',
507-
'report_bundle.js']),
508-
output_directory)
509-
except Exception as e:
510-
fo.copy_file(fo.join_paths(parent_dir,
511-
['report_template_components',
512-
'src',
513-
'bundles',
514-
'AlleleCallEvaluator',
515-
'report_bundle.js']),
516-
output_directory)
509+
# For development
510+
if script_path.endswith('CHEWBBACA') is False:
511+
script_path = os.path.dirname(script_path)
512+
513+
fo.copy_file(fo.join_paths(script_path, [ct.ALLELECALL_EVALUATOR_BUNDLE]),
514+
output_directory)
517515

518516
# Delete all temporary files
519517
fo.delete_directory(temp_directory)

CHEWBBACA/SchemaEvaluator/evaluate_schema.py

Lines changed: 31 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -502,23 +502,27 @@ def locus_report(locus_file, locus_data, annotation_columns,
502502
mafft_outfile = os.path.basename(mafft_infile)
503503
mafft_outfile = mafft_outfile.replace('.fasta', '_aligned.fasta')
504504
mafft_outfile = fo.join_paths(output_directory, [mafft_outfile])
505-
mafft_outfile_exists = mw.call_mafft(mafft_infile, mafft_outfile)
506-
# Get MSA data
507-
alignment_text = fo.read_file(mafft_outfile)
508-
msa_data['sequences'] = alignment_text
509-
510-
# Get Tree data
511-
# Get the phylocanvas data
512-
tree_file = mafft_outfile.replace('_aligned.fasta', '.fasta.tree')
513-
phylo_data = fo.read_file(tree_file)
514-
515-
# Start by substituting greatest value to avoid substituting
516-
# smaller values contained in greater values
517-
for i in range(locus_data[1], 0, -1):
518-
phylo_data = phylo_data.replace(f'{i}_', '')
519-
520-
phylo_data = phylo_data.replace('\n', '')
521-
phylo_data = {"phylo_data": phylo_data}
505+
# Run MAFFT to compute MSA
506+
mafft_results = mw.call_mafft(mafft_infile, mafft_outfile)
507+
if mafft_results[1] is True:
508+
# Get MSA data
509+
alignment_text = fo.read_file(mafft_outfile)
510+
msa_data['sequences'] = alignment_text
511+
512+
# Get Tree data
513+
# Get the phylocanvas data
514+
tree_file = mafft_outfile.replace('_aligned.fasta', '.fasta.tree')
515+
phylo_text = fo.read_file(tree_file)
516+
517+
# Start by substituting greatest value to avoid substituting
518+
# smaller values contained in greater values
519+
for i in range(locus_data[1], 0, -1):
520+
phylo_text = phylo_text.replace(f'{i}_', '')
521+
522+
phylo_text = phylo_text.replace('\n', '')
523+
phylo_data['phylo_data'] = phylo_text
524+
else:
525+
print(f'Could not compute MSA and phylogenetic tree for locus {locus}')
522526

523527
locus_columns = ct.LOCUS_COLUMNS.format(minimum_length,
524528
locus_data[14],
@@ -772,50 +776,20 @@ def main(schema_directory, output_directory, genes_list, annotations,
772776

773777
# Copy the JS bundle files to the respective directories
774778
# JS bundle used by schema report
775-
script_path = os.path.dirname(os.path.abspath(__file__))
776-
parent_dir = os.path.dirname(script_path)
777779
# When chewBBACA is installed
778-
try:
779-
shutil.copy(fo.join_paths(script_path,
780-
['report_template_components',
781-
'src',
782-
'bundles',
783-
'SchemaEvaluator',
784-
'schema_report',
785-
'report_bundle.js']),
786-
output_directory)
780+
script_path = os.path.dirname(os.path.abspath(__file__))
787781
# For development
788-
except Exception as e:
789-
shutil.copy(fo.join_paths(parent_dir,
790-
['report_template_components',
791-
'src',
792-
'bundles',
793-
'SchemaEvaluator',
794-
'schema_report',
795-
'report_bundle.js']),
796-
output_directory)
782+
if script_path.endswith('CHEWBBACA') is False:
783+
script_path = os.path.dirname(script_path)
784+
785+
shutil.copy(fo.join_paths(script_path, [ct.SCHEMA_EVALUATOR_SCHEMA_BUNDLE]),
786+
output_directory)
787+
797788
if loci_reports is True:
798-
# JS bundle used by loci reports
799-
try:
800-
shutil.copy(fo.join_paths(script_path,
801-
['report_template_components',
802-
'src',
803-
'bundles',
804-
'SchemaEvaluator',
805-
'loci_reports',
806-
'report_bundle.js']),
807-
html_dir)
808-
except Exception as e:
809-
shutil.copy(fo.join_paths(parent_dir,
810-
['report_template_components',
811-
'src',
812-
'bundles',
813-
'SchemaEvaluator',
814-
'loci_reports',
815-
'report_bundle.js']),
816-
html_dir)
789+
shutil.copy(fo.join_paths(script_path, [ct.SCHEMA_EVALUATOR_LOCI_BUNDLE]),
790+
html_dir)
817791

818792
# Delete all temporary files
819793
fo.delete_directory(temp_directory)
820794

821-
print(f'\nResults available in {output_directory}.')
795+
print(f'\nResults available in {os.path.abspath(output_directory)}.')

CHEWBBACA/utils/blast_wrapper.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -48,15 +48,17 @@ def make_blast_db(makeblastdb_path, input_fasta, output_path, db_type,
4848
A list with the warnings and errors raised by BLAST.
4949
"""
5050
# use '-parse-seqids' to be able to retrieve/align sequences by identifier
51-
blastdb_cmd = [makeblastdb_path, '-in', input_fasta,
51+
makedb_cmd = [makeblastdb_path, '-in', input_fasta,
5252
'-out', output_path, '-parse_seqids',
5353
'-dbtype', db_type]
5454

55-
makedb_cmd = subprocess.Popen(blastdb_cmd,
56-
stdout=subprocess.PIPE,
57-
stderr=subprocess.PIPE)
55+
makedb_process = subprocess.Popen(makedb_cmd,
56+
stdout=subprocess.PIPE,
57+
stderr=subprocess.PIPE)
58+
59+
#stdout, stderr = makedb_process.communicate()
5860

59-
stderr = makedb_cmd.stderr.readlines()
61+
stderr = makedb_process.stderr.readlines()
6062

6163
# ignore errors/warnings provided to `ignore`
6264
if len(stderr) > 0:

CHEWBBACA/utils/constants.py

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414

1515
import sys
16+
import shutil
1617
import platform
1718

1819

@@ -124,12 +125,9 @@
124125
BLAST_MAJOR = 2
125126
BLAST_MINOR = 9
126127

127-
# BLASTp and makeblastdb aliases for Linux and Windows
128-
BLASTP_ALIAS = 'blastp.exe' if platform.system() == 'Windows' else 'blastp'
129-
MAKEBLASTDB_ALIAS = 'makeblastdb.exe' if platform.system() == 'Windows' else 'makeblastdb'
130-
131-
# Prodigal alias (must be in PATH)
132-
PRODIGAL_PATH = 'prodigal'
128+
# Paths to BLASTp and makeblastdb executables in Linux and Windows
129+
BLASTP_ALIAS = 'blastp.exe' if platform.system() == 'Windows' else shutil.which('blastp')
130+
MAKEBLASTDB_ALIAS = 'makeblastdb.exe' if platform.system() == 'Windows' else shutil.which('makeblastdb')
133131

134132
# BLAST warnings to be ignored
135133
IGNORE_RAISED = ['Warning: [blastp] To obtain better run time '
@@ -138,6 +136,9 @@
138136
'<OUT_FILE_NAME> and use <OUT_FILE_NAME> as the '
139137
'argument to -seqidlist']
140138

139+
# Path to MAFFT executable
140+
MAFFT_ALIAS = shutil.which('mafft')
141+
141142
# Replacements for genome and loci identifiers
142143
CHAR_REPLACEMENTS = [("|", "_"), ("_", "-"), ("(", ""),
143144
(")", ""), ("'", ""), ("\"", ""),
@@ -306,6 +307,15 @@
306307
PRESENCE_ABSENCE_BASENAME = 'presence_absence.tsv'
307308
MISSING_LOCI_BASENAME = 'missing_loci_stats.tsv'
308309

310+
# Relative path to the JS bundles used by SchemaEvaluator
311+
# Main page
312+
SCHEMA_EVALUATOR_SCHEMA_BUNDLE = 'report_template_components/src/bundles/SchemaEvaluator/schema_report/report_bundle.js'
313+
# Loci pages
314+
SCHEMA_EVALUATOR_LOCI_BUNDLE = 'report_template_components/src/bundles/SchemaEvaluator/loci_reports/report_bundle.js'
315+
316+
# Relative path to the JS bundle used by AlleleCallEvaluator
317+
ALLELECALL_EVALUATOR_BUNDLE = 'report_template_components/src/bundles/AlleleCallEvaluator/report_bundle.js'
318+
309319
# Do not use list of strings as constants if the strings include formatting
310320
# placeholders. Multiple references to the list of strings will have the same
311321
# id and altering the strings with format will not change the list id. In

CHEWBBACA/utils/mafft_wrapper.py

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -13,35 +13,50 @@
1313

1414

1515
import os
16-
import shutil
1716
import subprocess
1817

1918
try:
20-
from utils import iterables_manipulation as im
19+
from utils import (constants as ct,
20+
iterables_manipulation as im)
2121
except ModuleNotFoundError:
22-
from CHEWBBACA.utils import iterables_manipulation as im
22+
from CHEWBBACA.utils import (constants as ct,
23+
iterables_manipulation as im)
2324

2425

2526
def call_mafft(input_file, output_file):
26-
"""Call MAFFT to generate an alignment.
27+
"""Call MAFFT to compute a MSA.
2728
2829
Parameters
2930
----------
3031
input_file : str
3132
Path to a FASTA file with the sequences to align.
33+
output_file : str
34+
Path to the output file created by MAFFT.
3235
3336
Returns
3437
-------
35-
Path to the file with the computed MSA if successful, False otherwise.
38+
output_file : str
39+
Path to the output file.
40+
outfile_exists : bool
41+
True if the output file was created, False otherwise.
42+
stdout : bytes
43+
MAFFT stdout.
44+
stderr : bytes
45+
MAFFT stderr.
46+
47+
True if the output file was created, False otherwise.
3648
"""
37-
mafft_cmd = [shutil.which('mafft'), '--thread', '1', '--treeout', '--retree', '1',
49+
mafft_cmd = [ct.MAFFT_ALIAS, '--thread', '1', '--treeout', '--retree', '1',
3850
'--maxiterate', '0', input_file, '>', output_file]
3951
mafft_cmd = ' '.join(mafft_cmd)
4052

53+
# Must include subprocess.PIPE to get stdout and stderr
4154
mafft_cmd = subprocess.Popen(mafft_cmd,
42-
shell=True,
4355
stdout=subprocess.PIPE,
44-
stderr=subprocess.PIPE)
45-
mafft_cmd.wait()
56+
stderr=subprocess.PIPE,
57+
shell=True)
58+
stdout, stderr = mafft_cmd.communicate()
4659

47-
return os.path.exists(output_file)
60+
outfile_exists = os.path.exists(output_file)
61+
62+
return [output_file, outfile_exists, stdout, stderr]

0 commit comments

Comments
 (0)