diff --git a/README.md b/README.md index 7ca5f929..a13e4a31 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ autoreconf -i make [sudo] make install cd python -[sudo] python3 -m pip install . +[sudo] python3 -m pip install [--prefix=$PREFIX] . ``` Use `sudo` to install Gubbins system-wide. If you don't have the permissions, run `configure` with a prefix to install Gubbins in your home directory. @@ -132,6 +132,17 @@ Gubbins is free software, licensed under [GPLv2](https://github.com/nickjcrouche ## Feedback/Issues There is no specific support for development or maintenance of Gubbins. However, we will try to help you out if you report any issues about usage of the software to the [issues page](https://github.com/nickjcroucher/gubbins/issues). +## Development plan +Version 3 incorporates a number of features that were explicitly requested by users (e.g. plotting functions), improved the algorithm's accuracy (e.g. using joint ancestral reconstruction) and were commonly used in published analyses (e.g. using IQTREE2 for phylogeny construction). + +Future development will prioritise: +- More efficient phylogenetic processing with modern python libraries +- Parallelisation of recombination searches +- Faster sequence reconstruction through hardware acceleration +- Extension of existing analyses using phylogenetic placement + +If you believe there are other improvements that could be added, please describe them on the [issues page](https://github.com/nickjcroucher/gubbins/issues) and tag the suggestion as an "enhancement". + ## Citation If you use this software please cite: [Croucher N. J., Page A. J., Connor T. R., Delaney A. J., Keane J. A., Bentley S. D., Parkhill J., Harris S.R. diff --git a/python/gubbins/common.py b/python/gubbins/common.py index ece5fc2f..c9e3bf39 100644 --- a/python/gubbins/common.py +++ b/python/gubbins/common.py @@ -744,13 +744,13 @@ def return_algorithm_choices(args,i): def return_algorithm(algorithm_choice, model, input_args, node_labels = None, extra = None): initialised_algorithm = None if algorithm_choice == "fasttree": - initialised_algorithm = FastTree(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra) + initialised_algorithm = FastTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "raxml": - initialised_algorithm = RAxML(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) + initialised_algorithm = RAxML(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "raxmlng": - initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) + initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "iqtree": - initialised_algorithm = IQTree(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra) + initialised_algorithm = IQTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra) elif algorithm_choice == "rapidnj": initialised_algorithm = RapidNJ(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "star": diff --git a/python/gubbins/run_gubbins.py b/python/gubbins/run_gubbins.py index cbf8080a..96e5f544 100755 --- a/python/gubbins/run_gubbins.py +++ b/python/gubbins/run_gubbins.py @@ -76,6 +76,8 @@ def parse_input_args(): default = False, action = 'store_true') treeGroup.add_argument('--sh-test', help='Perform an SH test of node likelihoods', default = False, action = 'store_true') + treeGroup.add_argument('--seed', help='Set seed for reproducibility of analysis', + default = None, type = int) modelGroup = parser.add_argument_group('Nucleotide substitution model options') modelGroup.add_argument('--model', '-M', help='Nucleotide substitution model (not all available for all ' diff --git a/python/gubbins/tests/test_dependencies.py b/python/gubbins/tests/test_dependencies.py index 0951842e..ebf80e05 100644 --- a/python/gubbins/tests/test_dependencies.py +++ b/python/gubbins/tests/test_dependencies.py @@ -79,6 +79,18 @@ def test_fasttree(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_fasttree_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + # Test resuming a default analysis def test_fasttree_resume(self): exit_code = 1 @@ -122,6 +134,18 @@ def test_iqtree(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_iqtree_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + def test_iqtree_custom_model(self): exit_code = 1 parser = run_gubbins.parse_input_args() @@ -246,6 +270,18 @@ def test_raxml(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_raxml_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "raxml", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + def test_raxml_custom_model(self): exit_code = 1 parser = run_gubbins.parse_input_args() @@ -301,6 +337,19 @@ def test_raxmlng(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_raxmlng_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng", + "--model","GTR", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + def test_raxmlng_custom_model(self): exit_code = 1 parser = run_gubbins.parse_input_args() diff --git a/python/gubbins/tests/test_utils.py b/python/gubbins/tests/test_utils.py index ce2146a4..2ee42b90 100644 --- a/python/gubbins/tests/test_utils.py +++ b/python/gubbins/tests/test_utils.py @@ -107,3 +107,9 @@ def test_verbose_printer(self): printer.print(["AAA", "BBB"]) printed = f.getvalue() assert printed == "AAA-BBB\n" + + def test_seed(self): + set_seed_val = utils.set_seed(42) + assert set_seed_val == "42" + random_seed_val = utils.set_seed(None) + assert(int(random_seed_val) < 10001) diff --git a/python/gubbins/treebuilders.py b/python/gubbins/treebuilders.py index 9e425373..01546e71 100644 --- a/python/gubbins/treebuilders.py +++ b/python/gubbins/treebuilders.py @@ -20,7 +20,6 @@ import sys import os import subprocess -from random import randint from Bio import SeqIO @@ -129,7 +128,7 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena class FastTree: """Class for operations with the FastTree executable""" - def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, additional_args = None): + def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', seed = None, verbose=False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -139,9 +138,10 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, a self.alignment_suffix = ".snp_sites.aln" self.bootstrap = bootstrap self.additional_args = additional_args + self.seed = utils.set_seed(seed) # Identify executable - self.potential_executables = ["FastTree", "fasttree"] + self.potential_executables = ["FastTreeMP","fasttreeMP","FastTree", "fasttree"] self.executable = utils.choose_executable(self.potential_executables) if self.executable is None: sys.exit("No usable version of FastTree could be found.") @@ -164,6 +164,7 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, a command.extend(["-gtr"]) else: command.extend([self.model]) + command.extend(["-seed",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) @@ -256,7 +257,7 @@ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str: class IQTree: """Class for operations with the IQTree executable""" - def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix="", verbose=False, use_best=False, additional_args = None): + def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, use_best=False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -271,6 +272,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix=" self.internal_node_prefix = internal_node_prefix self.bootstrap = bootstrap self.use_best = use_best + self.seed = utils.set_seed(seed) self.additional_args = additional_args # Construct base command @@ -303,6 +305,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix=" command.extend(["-m","GTR+G4"]) else: command.extend(["-m",self.model]) + command.extend(["-seed",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) @@ -432,7 +435,7 @@ def run_model_comparison(self, alignment_filename: str, basename: str) -> str: class RAxML: """Class for operations with the RAxML executable""" - def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_prefix="", verbose=False, additional_args = None): + def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -446,6 +449,7 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref self.alignment_suffix = ".phylip" self.internal_node_prefix = internal_node_prefix self.bootstrap = bootstrap + self.seed = utils.set_seed(seed) self.additional_args = additional_args self.single_threaded_executables = ['raxmlHPC-AVX2', 'raxmlHPC-AVX', 'raxmlHPC-SSE3', 'raxmlHPC'] @@ -465,9 +469,6 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref if self.threads > 1: command.extend(["-T", str(self.threads)]) - # Set a seed - command.extend(["-p",str(randint(0, 10000))]) - # Add flags command.extend(["-safe"]) if self.model == 'JC': @@ -482,6 +483,7 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref command.extend(["-m","GTRGAMMA"]) else: command.extend(["-m", self.model]) + command.extend(["-p",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) @@ -579,9 +581,7 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena command = self.base_command.copy() command.extend(["-s", alignment_filename, "-n", basename + ".bootstrapped_trees"]) command.extend(["-w",tmp]) - p_seed = str(randint(0, 10000)) - command.extend(["-p",p_seed]) - command.extend(["-x",p_seed]) + command.extend(["-x",self.seed]) command.extend(["-#",str(self.bootstrap)]) # Output if not self.verbose: @@ -592,8 +592,6 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str: """Runs a single branch support test""" command = self.base_command.copy() - p_seed = str(randint(0, 10000)) - command.extend(["-p",p_seed]) command.extend(["-f", "J"]) command.extend(["-s", alignment_filename, "-n", input_tree + ".sh_support"]) command.extend(["-t", input_tree]) @@ -610,7 +608,7 @@ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str: class RAxMLNG: """Class for operations with the RAxML executable""" - def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = "", verbose = False, additional_args = None): + def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -624,6 +622,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = self.alignment_suffix = ".phylip" self.internal_node_prefix = internal_node_prefix self.bootstrap = bootstrap + self.seed = utils.set_seed(seed) self.additional_args = additional_args self.single_threaded_executables = ['raxml-ng'] @@ -655,6 +654,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = command.extend(["GTR+G"]) else: command.extend([self.model]) + command.extend(["--seed",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) diff --git a/python/gubbins/utils.py b/python/gubbins/utils.py index 3b70faa5..fdc4dbd3 100644 --- a/python/gubbins/utils.py +++ b/python/gubbins/utils.py @@ -23,6 +23,7 @@ import re import numpy as np import collections +from random import randint try: from multiprocessing.managers import SharedMemoryManager NumpyShared = collections.namedtuple('NumpyShared', ('name', 'shape', 'dtype')) @@ -195,3 +196,11 @@ def extend_args(var,add): var.extend([add]) var = " ".join(var) return var + +def set_seed(seed): + """Set seed when specified""" + if seed is None: + seed = str(randint(0, 10000)) + else: + seed = str(seed) + return seed