From 06cf687587e46813aa38e81095b87929005c187e Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 18 Dec 2023 12:07:03 +1100 Subject: [PATCH] Indices: Ensure branches are non-negative for most phylo calculations Exceptions are those that return information about the tree, e.g. how many and which branches match the basedata. --- lib/Biodiverse/Indices/PhyloCom.pm | 4 ++ lib/Biodiverse/Indices/Phylogenetic.pm | 24 +++++++++--- lib/Biodiverse/Tree.pm | 14 +++++++ t/24-Indices-phylogenetic.t | 54 ++++++++++++++++++++++++++ 4 files changed, 91 insertions(+), 5 deletions(-) diff --git a/lib/Biodiverse/Indices/PhyloCom.pm b/lib/Biodiverse/Indices/PhyloCom.pm index 865321f82..429a266b5 100644 --- a/lib/Biodiverse/Indices/PhyloCom.pm +++ b/lib/Biodiverse/Indices/PhyloCom.pm @@ -166,6 +166,7 @@ sub get_metadata_calc_phylo_mpd_mntd1 { . 'along the tree. Compares with ' . 'all other labels across both neighbour sets. ', name => 'Phylogenetic and Nearest taxon distances, unweighted', + pre_conditions => ['tree_branches_are_nonnegative'], %$submeta, ); @@ -200,6 +201,7 @@ sub get_metadata_calc_phylo_mpd_mntd2 { . 'all other labels across both neighbour sets. ' . 'Weighted by sample counts', name => 'Phylogenetic and Nearest taxon distances, local range weighted', + pre_conditions => ['tree_branches_are_nonnegative'], %$submeta, ); @@ -234,6 +236,7 @@ sub get_metadata_calc_phylo_mpd_mntd3 { . 'all other labels across both neighbour sets. ' . 'Weighted by sample counts (which currently must be integers)', name => 'Phylogenetic and Nearest taxon distances, abundance weighted', + pre_conditions => ['tree_branches_are_nonnegative'], %$submeta, ); @@ -1069,6 +1072,7 @@ sub get_metadata__calc_nri_nti_expected_values { pre_calc_global => $pre_calc_global, required_args => 'tree_ref', uses_nbr_lists => 1, + pre_conditions => ['tree_branches_are_nonnegative'], ); return $metadata_class->new(\%metadata); diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index cb806619a..46fa697b4 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -432,6 +432,7 @@ sub get_metadata__calc_pd { pre_calc_global => [qw /get_path_length_cache set_path_length_cache_by_group_flag/], uses_nbr_lists => 1, # how many lists it must have required_args => {'tree_ref' => 1}, + pre_conditions => ['tree_branches_are_nonnegative'], ); return $metadata_class->new(\%metadata); @@ -717,6 +718,7 @@ sub get_metadata_calc_pe { distribution => 'unit_interval', }, }, + pre_conditions => ['tree_branches_are_nonnegative'], ); return $metadata_class->new(\%metadata); @@ -2041,14 +2043,15 @@ sub get_node_abundance_global { sub get_metadata_get_trimmed_tree { my %metadata = ( - name => 'get_trimmed_tree', - description => 'Get a version of the tree trimmed to contain only labels in the basedata', - required_args => 'tree_ref', - indices => { + name => 'get_trimmed_tree', + description => 'Get a version of the tree trimmed to contain only labels in the basedata', + required_args => 'tree_ref', + indices => { trimmed_tree => { description => 'Trimmed tree', }, }, + pre_conditions => ['tree_branches_are_nonnegative'], ); return $metadata_class->new(\%metadata); } @@ -2412,7 +2415,8 @@ sub get_metadata_calc_phylo_jaccard { cluster_can_lump_zeroes => 1, } }, - required_args => {'tree_ref' => 1} + required_args => {'tree_ref' => 1}, + pre_conditions => ['tree_branches_are_nonnegative'], ); return $metadata_class->new(\%metadata); @@ -2613,6 +2617,7 @@ sub get_metadata__calc_phylo_abc_lists { pre_calc_global => [qw /get_trimmed_tree get_path_length_cache set_path_length_cache_by_group_flag/], uses_nbr_lists => 1, # how many sets of lists it must have required_args => {tree_ref => 1}, + pre_conditions => ['tree_branches_are_nonnegative'], ); return $metadata_class->new(\%metadata); @@ -3163,6 +3168,15 @@ sub calc_phylo_abundance { return wantarray ? %results : \%results; } + +sub tree_branches_are_nonnegative { + my ($self, %args) = @_; + + my $tree_ref = $args{tree_ref} or croak 'tree_ref arg not passsed'; + + $tree_ref->branches_are_nonnegative; +} + 1; diff --git a/lib/Biodiverse/Tree.pm b/lib/Biodiverse/Tree.pm index 498113140..c8fac7cd6 100644 --- a/lib/Biodiverse/Tree.pm +++ b/lib/Biodiverse/Tree.pm @@ -3736,6 +3736,20 @@ sub get_mean_nearest_neighbour_distance { return $mean; } +sub branches_are_nonnegative { + my $self = shift; + + state $cache_key = 'BRANCHES_ARE_NONNEGATIVE'; + my $non_neg = $self->get_cached_value ($cache_key); + + return $non_neg if defined $non_neg; + + $non_neg = List::Util::all {$_->get_length >= 0} $self->get_node_refs; + $non_neg //= 0; + $self->set_cached_value($cache_key => $non_neg); + + return $non_neg; +} 1; diff --git a/t/24-Indices-phylogenetic.t b/t/24-Indices-phylogenetic.t index 4313794ce..5dae4d6cc 100644 --- a/t/24-Indices-phylogenetic.t +++ b/t/24-Indices-phylogenetic.t @@ -508,6 +508,60 @@ sub test_pe_with_extra_nodes_in_tree { } +# there should be no valid phylo calcs with neg branches +sub test_neg_branch_lengths { + # use Data::Printer; + + my $bd = Biodiverse::BaseData->new( + CELL_SIZES => [ 2, 2 ], + NAME => 'Neg branch length', + ); + $bd->add_element (group => '1:1', label => 'a'); + + my $tree = get_tree_object(); + my @nodes = $tree->get_node_refs; + foreach my $node (@nodes) { + $node->set_length_aa(1); + } + $nodes[0]->set_length_aa (-1); + + my $indices = Biodiverse::Indices->new(BASEDATA_REF => $bd); + + my $calcs = $indices->get_calculations; + my @phylo_calcs; + foreach my $type (sort keys %$calcs) { + next if not $type =~ /^Phylo/; + push @phylo_calcs, sort @{$calcs->{$type}}; + } + + # It is clunky that we do this here + my $re1 = qr/calc(?:_count)?_labels(?:_not)?_on(?:_trimmed)?_tree/; + my $re2 = qr/calc_last_shared_ancestor_props/; + @phylo_calcs = grep {not $_ =~ /$re1|$re2/} @phylo_calcs; + + my $valid_calcs = eval { + $indices->get_valid_calculations( + calculations => \@phylo_calcs, + nbr_list_count => 2, + element_list1 => [], # for validity checking only + element_list2 => [], + tree_ref => $tree, + processing_element => 'x', + ); + }; + my $e = $@; + note $e if $e; + ok (!$e, "Obtained valid calcs without eval error"); + is $indices->get_valid_calculation_count, 0, + "no valid phylo calcs when there are negative branch lengths"; + + # my @keys = sort keys %{$valid_calcs->{calculations_to_run}}; + # p @keys; + # my $to_clear = $indices->get_invalid_calculations; + # p $to_clear; +} + + done_testing; 1;