Skip to content

Commit 18af663

Browse files
authored
Merge pull request #343 from andrewjpage/core_paralogs
Optionally allow paralogs in core gene alignment
2 parents f299b01 + 8d34a81 commit 18af663

10 files changed

+194
-139
lines changed

dist.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
name = Bio-Roary
2-
version = 3.9.0
2+
version = 3.9.1
33
author = Andrew J. Page <ap13@sanger.ac.uk>
44
license = GPL_3
55
copyright_holder = Wellcome Trust Sanger Institute

lib/Bio/Roary.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ has 'core_definition' => ( is => 'rw', isa => 'Num', default =
4848
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
4949
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
5050
has 'inflation_value' => ( is => 'rw', isa => 'Num', default => 1.5 );
51+
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
5152

5253
has 'output_multifasta_files' => ( is => 'ro', isa => 'Bool', default => 0 );
5354

@@ -136,6 +137,7 @@ sub run {
136137
core_definition => $self->core_definition,
137138
verbose => $self->verbose,
138139
mafft => $self->mafft,
140+
allow_paralogs => $self->allow_paralogs,
139141
);
140142
$post_analysis->run();
141143

lib/Bio/Roary/CommandLine/Roary.pm

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ has 'dont_split_groups' => ( is => 'rw', isa => 'Bool', default => 0 );
4747
has 'verbose_stats' => ( is => 'rw', isa => 'Bool', default => 0 );
4848
has 'translation_table' => ( is => 'rw', isa => 'Int', default => 11 );
4949
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
50+
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
5051
has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50000 );
5152
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
5253
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
@@ -71,7 +72,7 @@ sub BUILD {
7172
$job_runner, $makeblastdb_exec, $mcxdeblast_exec, $mcl_exec, $blastp_exec,
7273
$apply_unknowns_filter, $cpus, $output_multifasta_files, $verbose_stats, $translation_table,
7374
$run_qc, $core_definition, $help, $kraken_db, $cmd_version,
74-
$mafft, $output_directory, $check_dependancies, $inflation_value,
75+
$mafft, $output_directory, $check_dependancies, $inflation_value, $allow_paralogs,
7576
);
7677

7778
GetOptionsFromArray(
@@ -98,6 +99,7 @@ sub BUILD {
9899
'cd|core_definition=f' => \$core_definition,
99100
'v|verbose' => \$verbose,
100101
'n|mafft' => \$mafft,
102+
'ap|allow_paralogs' => \$allow_paralogs,
101103
'k|kraken_db=s' => \$kraken_db,
102104
'w|version' => \$cmd_version,
103105
'a|check_dependancies' => \$check_dependancies,
@@ -302,7 +304,8 @@ sub run {
302304
core_definition => $self->core_definition,
303305
verbose => $self->verbose,
304306
mafft => $self->mafft,
305-
inflation_value => $self->inflation_value,
307+
allow_paralogs => $self->allow_paralogs,
308+
inflation_value => $self->inflation_value,
306309
);
307310
$pan_genome_obj->run();
308311

@@ -343,11 +346,12 @@ Options: -p INT number of threads [1]
343346
-r create R plots, requires R and ggplot2
344347
-s dont split paralogs
345348
-t INT translation table [11]
349+
-ap allow paralogs in core alignment
346350
-z dont delete intermediate files
347351
-v verbose output to STDOUT
348352
-w print version and exit
349353
-y add gene inference information to spreadsheet, doesnt work with -e
350-
-iv STR Change the MCL inflation value [1.5]
354+
-iv STR Change the MCL inflation value [1.5]
351355
-h this help message
352356
353357
Example: Quickly generate a core gene alignment using 8 threads

lib/Bio/Roary/CommandLine/RoaryCoreAlignment.pm

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,14 @@ has 'spreadsheet_filename' => ( is => 'rw', isa => 'Str', default => 'gene_
2727
has 'output_filename' => ( is => 'rw', isa => 'Str', default => 'core_gene_alignment.aln' );
2828
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
2929
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
30+
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
3031
has '_error_message' => ( is => 'rw', isa => 'Str' );
3132
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
3233

3334
sub BUILD {
3435
my ($self) = @_;
3536

36-
my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $dont_delete_files );
37+
my ( $multifasta_base_directory, $spreadsheet_filename, $output_filename, $core_definition,$verbose, $help, $mafft, $allow_paralogs, $dont_delete_files );
3738

3839
GetOptionsFromArray(
3940
$self->args,
@@ -42,6 +43,7 @@ sub BUILD {
4243
'o|output_filename=s' => \$output_filename,
4344
'cd|core_definition=f' => \$core_definition,
4445
'z|dont_delete_files' => \$dont_delete_files,
46+
'p|allow_paralogs' => \$allow_paralogs,
4547
'v|verbose' => \$verbose,
4648
'h|help' => \$help,
4749
);
@@ -51,6 +53,7 @@ sub BUILD {
5153
$self->logger->level(10000);
5254
}
5355
$self->help($help) if(defined($help));
56+
$self->allow_paralogs($allow_paralogs) if(defined($allow_paralogs));
5457

5558
if ( defined($multifasta_base_directory) && ( -d $multifasta_base_directory ) ) {
5659
$self->multifasta_base_directory( abs_path($multifasta_base_directory));
@@ -95,7 +98,8 @@ sub run {
9598
$self->logger->info("Extract core genes from spreadsheet");
9699
my $core_genes_obj = Bio::Roary::ExtractCoreGenesFromSpreadsheet->new(
97100
spreadsheet => $self->spreadsheet_filename,
98-
core_definition => $self->core_definition
101+
core_definition => $self->core_definition,
102+
allow_paralogs => $self->allow_paralogs
99103
);
100104

101105
$self->logger->info("Looking up genes in files");
@@ -130,6 +134,7 @@ Options: -o STR output filename [core_gene_alignment.aln]
130134
-cd FLOAT percentage of isolates a gene must be in to be core [99]
131135
-m STR directory containing gene multi-FASTAs [pan_genome_sequences]
132136
-s STR gene presence and absence spreadsheet [gene_presence_absence.csv]
137+
-p allow paralogs
133138
-z dont delete intermediate files
134139
-v verbose output to STDOUT
135140
-h this help message

lib/Bio/Roary/CommandLine/RoaryPostAnalysis.pm

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,14 +41,15 @@ has 'group_limit' => ( is => 'rw', isa => 'Num', default => 500
4141
has 'core_definition' => ( is => 'rw', isa => 'Num', default => 0.99 );
4242
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
4343
has 'mafft' => ( is => 'rw', isa => 'Bool', default => 0 );
44+
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
4445

4546
sub BUILD {
4647
my ($self) = @_;
4748

4849
my (
4950
$output_filename, $dont_create_rplots, $dont_delete_files, $dont_split_groups, $output_pan_geneome_filename,
5051
$job_runner, $output_statistics_filename, $output_multifasta_files, $clusters_filename, $core_definition,
51-
$fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft
52+
$fasta_files, $input_files, $verbose_stats, $translation_table, $help, $cpus,$group_limit,$verbose,$mafft, $allow_paralogs
5253
);
5354

5455

@@ -72,6 +73,7 @@ sub BUILD {
7273
'cd|core_definition=f' => \$core_definition,
7374
'v|verbose' => \$verbose,
7475
'n|mafft' => \$mafft,
76+
'q|allow_paralogs' => \$allow_paralogs,
7577
'h|help' => \$help,
7678
);
7779

@@ -93,6 +95,7 @@ sub BUILD {
9395
$self->group_limit($group_limit) if ( defined($group_limit) );
9496
$self->core_definition( $core_definition/100 ) if ( defined($core_definition) );
9597
$self->mafft($mafft) if ( defined($mafft) );
98+
$self->allow_paralogs($allow_paralogs) if ( defined($allow_paralogs) );
9699
if ( defined($verbose) ) {
97100
$self->verbose($verbose);
98101
$self->logger->level(10000);
@@ -158,6 +161,7 @@ sub run {
158161
cpus => $self->cpus,
159162
verbose => $self->verbose,
160163
mafft => $self->mafft,
164+
allow_paralogs => $self->allow_paralogs,
161165
dont_delete_files => $self->dont_delete_files,
162166
num_input_files => $#{$input_files},
163167
);
@@ -222,6 +226,7 @@ Options: -a dont delete intermediate files
222226
-n fast core gene alignement with MAFFT instead of PRANK
223227
-o STR clusters output filename [clustered_proteins]
224228
-p STR output pan genome filename [pan_genome.fa]
229+
-q allow paralogs in core alignment
225230
-s STR output gene presence and absence filename [gene_presence_absence.csv]
226231
-t INT translation table [11]
227232
-z INT number of threads [1]

lib/Bio/Roary/External/GeneAlignmentFromNucleotides.pm

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ has 'translation_table' => ( is => 'rw', isa => 'Int', default =>
2929
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1 );
3030
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
3131
has 'dont_delete_files' => ( is => 'rw', isa => 'Bool', default => 0 );
32+
has 'allow_paralogs' => ( is => 'rw', isa => 'Bool', default => 0 );
3233
has 'num_input_files' => ( is => 'ro', isa => 'Int', required => 1);
3334

3435
# Overload Role`
@@ -85,6 +86,7 @@ sub _build__core_alignment_cmd {
8586
my $core_cmd = "pan_genome_core_alignment";
8687
$core_cmd .= " -cd " . ($self->core_definition*100) if ( defined $self->core_definition );
8788
$core_cmd .= " --dont_delete_files " if ( defined $self->dont_delete_files && $self->dont_delete_files == 1 );
89+
$core_cmd .= " --allow_paralogs " if ( defined $self->allow_paralogs && $self->allow_paralogs == 1 );
8890

8991
return $core_cmd;
9092
}

lib/Bio/Roary/External/PostAnalysis.pm

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ has 'group_limit' => ( is => 'rw', isa => 'Num', default => 50
3737
has 'core_definition' => ( is => 'ro', isa => 'Num', default => 1.0 );
3838
has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
3939
has 'mafft' => ( is => 'ro', isa => 'Bool', default => 0 );
40+
has 'allow_paralogs' => ( is => 'ro', isa => 'Bool', default => 0 );
4041
has '_working_directory' => ( is => 'ro', isa => 'File::Temp::Dir', default => sub { File::Temp->newdir( DIR => getcwd, CLEANUP => 1 ); } );
4142
has '_gff_fofn' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__gff_fofn' );
4243
has '_fasta_fofn' => ( is => 'ro', isa => 'Str', lazy => 1, builder => '_build__fasta_fofn' );
@@ -137,6 +138,9 @@ sub _command_to_run {
137138

138139
my $verbose_flag = '';
139140
$verbose_flag = '-v' if ( defined($self->verbose) && $self->verbose == 1 );
141+
142+
my $allow_paralogs_flag = '';
143+
$allow_paralogs_flag = '--allow_paralogs' if ( defined($self->allow_paralogs) && $self->allow_paralogs == 1 );
140144

141145
return join(
142146
" ",
@@ -156,6 +160,7 @@ sub _command_to_run {
156160
$verbose_stats_flag,
157161
$verbose_flag,
158162
$mafft_flag,
163+
$allow_paralogs_flag,
159164
'-j', $self->job_runner,
160165
'--processors', $self->cpus,
161166
'--group_limit', $self->group_limit,

0 commit comments

Comments
 (0)