From 52a6885057577cd2d46814d3f94e973a7fcf80f5 Mon Sep 17 00:00:00 2001 From: Nick Croucher Date: Thu, 30 Nov 2023 11:52:21 +0000 Subject: [PATCH 1/5] Enable seed specification --- python/gubbins/common.py | 8 ++-- python/gubbins/run_gubbins.py | 2 + python/gubbins/tests/test_dependencies.py | 49 +++++++++++++++++++++++ python/gubbins/tests/test_utils.py | 6 +++ python/gubbins/treebuilders.py | 26 ++++++------ python/gubbins/utils.py | 9 +++++ 6 files changed, 83 insertions(+), 17 deletions(-) diff --git a/python/gubbins/common.py b/python/gubbins/common.py index ece5fc2f..c9e3bf39 100644 --- a/python/gubbins/common.py +++ b/python/gubbins/common.py @@ -744,13 +744,13 @@ def return_algorithm_choices(args,i): def return_algorithm(algorithm_choice, model, input_args, node_labels = None, extra = None): initialised_algorithm = None if algorithm_choice == "fasttree": - initialised_algorithm = FastTree(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra) + initialised_algorithm = FastTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "raxml": - initialised_algorithm = RAxML(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) + initialised_algorithm = RAxML(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "raxmlng": - initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) + initialised_algorithm = RAxMLNG(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "iqtree": - initialised_algorithm = IQTree(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra) + initialised_algorithm = IQTree(threads = input_args.threads, model = model, seed = input_args.seed, bootstrap = input_args.bootstrap, internal_node_prefix = node_labels, verbose = input_args.verbose, use_best = (model is None and input_args.best_model), additional_args = extra) elif algorithm_choice == "rapidnj": initialised_algorithm = RapidNJ(threads = input_args.threads, model = model, bootstrap = input_args.bootstrap, verbose = input_args.verbose, additional_args = extra) elif algorithm_choice == "star": diff --git a/python/gubbins/run_gubbins.py b/python/gubbins/run_gubbins.py index cbf8080a..96e5f544 100755 --- a/python/gubbins/run_gubbins.py +++ b/python/gubbins/run_gubbins.py @@ -76,6 +76,8 @@ def parse_input_args(): default = False, action = 'store_true') treeGroup.add_argument('--sh-test', help='Perform an SH test of node likelihoods', default = False, action = 'store_true') + treeGroup.add_argument('--seed', help='Set seed for reproducibility of analysis', + default = None, type = int) modelGroup = parser.add_argument_group('Nucleotide substitution model options') modelGroup.add_argument('--model', '-M', help='Nucleotide substitution model (not all available for all ' diff --git a/python/gubbins/tests/test_dependencies.py b/python/gubbins/tests/test_dependencies.py index 0951842e..ebf80e05 100644 --- a/python/gubbins/tests/test_dependencies.py +++ b/python/gubbins/tests/test_dependencies.py @@ -79,6 +79,18 @@ def test_fasttree(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_fasttree_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "fasttree", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + # Test resuming a default analysis def test_fasttree_resume(self): exit_code = 1 @@ -122,6 +134,18 @@ def test_iqtree(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_iqtree_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "iqtree", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + def test_iqtree_custom_model(self): exit_code = 1 parser = run_gubbins.parse_input_args() @@ -246,6 +270,18 @@ def test_raxml(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_raxml_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "raxml", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + def test_raxml_custom_model(self): exit_code = 1 parser = run_gubbins.parse_input_args() @@ -301,6 +337,19 @@ def test_raxmlng(self): self.cleanup('multiple_recombinations') assert exit_code == 0 + def test_raxmlng_seed(self): + exit_code = 1 + parser = run_gubbins.parse_input_args() + common.parse_and_run(parser.parse_args(["--tree-builder", "raxmlng", + "--model","GTR", + "--verbose", "--iterations", "3", + "--seed","42", + "--threads", "1", + os.path.join(data_dir, 'multiple_recombinations.aln')])) + exit_code = self.check_for_output_files('multiple_recombinations') + self.cleanup('multiple_recombinations') + assert exit_code == 0 + def test_raxmlng_custom_model(self): exit_code = 1 parser = run_gubbins.parse_input_args() diff --git a/python/gubbins/tests/test_utils.py b/python/gubbins/tests/test_utils.py index ce2146a4..2ee42b90 100644 --- a/python/gubbins/tests/test_utils.py +++ b/python/gubbins/tests/test_utils.py @@ -107,3 +107,9 @@ def test_verbose_printer(self): printer.print(["AAA", "BBB"]) printed = f.getvalue() assert printed == "AAA-BBB\n" + + def test_seed(self): + set_seed_val = utils.set_seed(42) + assert set_seed_val == "42" + random_seed_val = utils.set_seed(None) + assert(int(random_seed_val) < 10001) diff --git a/python/gubbins/treebuilders.py b/python/gubbins/treebuilders.py index 9e425373..bd2c02d4 100644 --- a/python/gubbins/treebuilders.py +++ b/python/gubbins/treebuilders.py @@ -20,7 +20,6 @@ import sys import os import subprocess -from random import randint from Bio import SeqIO @@ -129,7 +128,7 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena class FastTree: """Class for operations with the FastTree executable""" - def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, additional_args = None): + def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', seed = None, verbose=False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -139,6 +138,7 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, a self.alignment_suffix = ".snp_sites.aln" self.bootstrap = bootstrap self.additional_args = additional_args + self.seed = utils.set_seed(seed) # Identify executable self.potential_executables = ["FastTree", "fasttree"] @@ -164,6 +164,7 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', verbose=False, a command.extend(["-gtr"]) else: command.extend([self.model]) + command.extend(["-seed",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) @@ -256,7 +257,7 @@ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str: class IQTree: """Class for operations with the IQTree executable""" - def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix="", verbose=False, use_best=False, additional_args = None): + def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, use_best=False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -271,6 +272,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix=" self.internal_node_prefix = internal_node_prefix self.bootstrap = bootstrap self.use_best = use_best + self.seed = utils.set_seed(seed) self.additional_args = additional_args # Construct base command @@ -303,6 +305,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix=" command.extend(["-m","GTR+G4"]) else: command.extend(["-m",self.model]) + command.extend(["-seed",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) @@ -432,7 +435,7 @@ def run_model_comparison(self, alignment_filename: str, basename: str) -> str: class RAxML: """Class for operations with the RAxML executable""" - def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_prefix="", verbose=False, additional_args = None): + def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, seed = None, internal_node_prefix="", verbose=False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -446,6 +449,7 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref self.alignment_suffix = ".phylip" self.internal_node_prefix = internal_node_prefix self.bootstrap = bootstrap + self.seed = utils.set_seed(seed) self.additional_args = additional_args self.single_threaded_executables = ['raxmlHPC-AVX2', 'raxmlHPC-AVX', 'raxmlHPC-SSE3', 'raxmlHPC'] @@ -465,9 +469,6 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref if self.threads > 1: command.extend(["-T", str(self.threads)]) - # Set a seed - command.extend(["-p",str(randint(0, 10000))]) - # Add flags command.extend(["-safe"]) if self.model == 'JC': @@ -482,6 +483,7 @@ def __init__(self, threads: 1, model='GTRCAT', bootstrap = 0, internal_node_pref command.extend(["-m","GTRGAMMA"]) else: command.extend(["-m", self.model]) + command.extend(["-p",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) @@ -579,9 +581,7 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena command = self.base_command.copy() command.extend(["-s", alignment_filename, "-n", basename + ".bootstrapped_trees"]) command.extend(["-w",tmp]) - p_seed = str(randint(0, 10000)) - command.extend(["-p",p_seed]) - command.extend(["-x",p_seed]) + command.extend(["-x",self.seed]) command.extend(["-#",str(self.bootstrap)]) # Output if not self.verbose: @@ -592,8 +592,6 @@ def bootstrapping_command(self, alignment_filename: str, input_tree: str, basena def sh_test(self, alignment_filename: str, input_tree: str, basename: str, tmp: str) -> str: """Runs a single branch support test""" command = self.base_command.copy() - p_seed = str(randint(0, 10000)) - command.extend(["-p",p_seed]) command.extend(["-f", "J"]) command.extend(["-s", alignment_filename, "-n", input_tree + ".sh_support"]) command.extend(["-t", input_tree]) @@ -610,7 +608,7 @@ def get_bootstrapped_trees_file(self, tmp: str, basename: str) -> str: class RAxMLNG: """Class for operations with the RAxML executable""" - def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = "", verbose = False, additional_args = None): + def __init__(self, threads: 1, model: str, bootstrap = 0, seed = None, internal_node_prefix = "", verbose = False, additional_args = None): """Initialises the object""" self.verbose = verbose self.threads = threads @@ -624,6 +622,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = self.alignment_suffix = ".phylip" self.internal_node_prefix = internal_node_prefix self.bootstrap = bootstrap + self.seed = utils.set_seed(seed) self.additional_args = additional_args self.single_threaded_executables = ['raxml-ng'] @@ -655,6 +654,7 @@ def __init__(self, threads: 1, model: str, bootstrap = 0, internal_node_prefix = command.extend(["GTR+G"]) else: command.extend([self.model]) + command.extend(["--seed",self.seed]) # Additional arguments if self.additional_args is not None: command.extend([self.additional_args]) diff --git a/python/gubbins/utils.py b/python/gubbins/utils.py index 3b70faa5..fdc4dbd3 100644 --- a/python/gubbins/utils.py +++ b/python/gubbins/utils.py @@ -23,6 +23,7 @@ import re import numpy as np import collections +from random import randint try: from multiprocessing.managers import SharedMemoryManager NumpyShared = collections.namedtuple('NumpyShared', ('name', 'shape', 'dtype')) @@ -195,3 +196,11 @@ def extend_args(var,add): var.extend([add]) var = " ".join(var) return var + +def set_seed(seed): + """Set seed when specified""" + if seed is None: + seed = str(randint(0, 10000)) + else: + seed = str(seed) + return seed From 9228a0d9e842b79a01a92ce230a72bda5e908ea2 Mon Sep 17 00:00:00 2001 From: Nick Croucher Date: Thu, 30 Nov 2023 13:19:08 +0000 Subject: [PATCH 2/5] Describe development plans --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index 7ca5f929..99c3727b 100644 --- a/README.md +++ b/README.md @@ -132,6 +132,17 @@ Gubbins is free software, licensed under [GPLv2](https://github.com/nickjcrouche ## Feedback/Issues There is no specific support for development or maintenance of Gubbins. However, we will try to help you out if you report any issues about usage of the software to the [issues page](https://github.com/nickjcroucher/gubbins/issues). +## Development plan +Version 3 incorporates a number of features that were explicitly requested by users (e.g. ), improved the algorithm's accuracy (e.g. using joint ancestral reconstruction) and were commonly used in published analyses (e.g. using IQTREE2 for phylogeny construction). + +Future development will prioritise: +- More efficient phylogenetic processing with modern python libraries +- Parallelisation of recombination searches +- Faster sequence reconstruction through hardware acceleration +- Extension of existing analyses using phylogenetic placement + +If you believe there are other improvements that could be added, please describe them on the [issues page](https://github.com/nickjcroucher/gubbins/issues) and tag the suggestion as an "enhancement". + ## Citation If you use this software please cite: [Croucher N. J., Page A. J., Connor T. R., Delaney A. J., Keane J. A., Bentley S. D., Parkhill J., Harris S.R. From b4e7a92598a59e99b80219d92170dec77345ea05 Mon Sep 17 00:00:00 2001 From: Nick Croucher Date: Thu, 30 Nov 2023 13:23:10 +0000 Subject: [PATCH 3/5] Add missing example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 99c3727b..1db862b6 100644 --- a/README.md +++ b/README.md @@ -133,7 +133,7 @@ Gubbins is free software, licensed under [GPLv2](https://github.com/nickjcrouche There is no specific support for development or maintenance of Gubbins. However, we will try to help you out if you report any issues about usage of the software to the [issues page](https://github.com/nickjcroucher/gubbins/issues). ## Development plan -Version 3 incorporates a number of features that were explicitly requested by users (e.g. ), improved the algorithm's accuracy (e.g. using joint ancestral reconstruction) and were commonly used in published analyses (e.g. using IQTREE2 for phylogeny construction). +Version 3 incorporates a number of features that were explicitly requested by users (e.g. plotting functions), improved the algorithm's accuracy (e.g. using joint ancestral reconstruction) and were commonly used in published analyses (e.g. using IQTREE2 for phylogeny construction). Future development will prioritise: - More efficient phylogenetic processing with modern python libraries From bf3bd21dd8716314c78f25c34610a44e68d65c9d Mon Sep 17 00:00:00 2001 From: Nick Croucher Date: Thu, 30 Nov 2023 13:42:40 +0000 Subject: [PATCH 4/5] Prioritise parallelised version of fasttree --- python/gubbins/treebuilders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/gubbins/treebuilders.py b/python/gubbins/treebuilders.py index bd2c02d4..01546e71 100644 --- a/python/gubbins/treebuilders.py +++ b/python/gubbins/treebuilders.py @@ -141,7 +141,7 @@ def __init__(self, threads: int, bootstrap = 0, model='GTRCAT', seed = None, ver self.seed = utils.set_seed(seed) # Identify executable - self.potential_executables = ["FastTree", "fasttree"] + self.potential_executables = ["FastTreeMP","fasttreeMP","FastTree", "fasttree"] self.executable = utils.choose_executable(self.potential_executables) if self.executable is None: sys.exit("No usable version of FastTree could be found.") From b8dbdb481acf289b344c08db3003a1038b9b3edf Mon Sep 17 00:00:00 2001 From: Nick Croucher Date: Thu, 30 Nov 2023 14:58:31 +0000 Subject: [PATCH 5/5] Update installation instructions (#152) --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 1db862b6..a13e4a31 100644 --- a/README.md +++ b/README.md @@ -74,7 +74,7 @@ autoreconf -i make [sudo] make install cd python -[sudo] python3 -m pip install . +[sudo] python3 -m pip install [--prefix=$PREFIX] . ``` Use `sudo` to install Gubbins system-wide. If you don't have the permissions, run `configure` with a prefix to install Gubbins in your home directory.