diff --git a/lib/Biodiverse/Common.pm b/lib/Biodiverse/Common.pm index feb2d0f38..fb6fa0698 100644 --- a/lib/Biodiverse/Common.pm +++ b/lib/Biodiverse/Common.pm @@ -447,16 +447,7 @@ sub weaken_param { sub delete_params { my $self = shift; - my $count = 0; - foreach my $param (@_) { # should only delete those that exist... - if (delete $self->{PARAMS}{$param}) { - $count ++; - print "Deleted parameter $param from $self\n" - if $self->get_param('PARAM_CHANGE_WARN'); - } - } # inefficient, as we could use a hash slice to do all in one hit, but allows better feedback - - return $count; + scalar delete @{$self->{PARAMS}}{@_}; } # an internal apocalyptic sub. use only for destroy methods diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index 84023aee5..75f3ba5b8 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -1703,22 +1703,13 @@ sub _calc_abc_any { my $self = shift; my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); - my $cache_key - = List::Util::first {defined $cache_hash->{$_}} - (qw/calc_abc calc_abc2 calc_abc3/); - # say STDERR 'NO previous cache key' - # if !$cache_key; + my $results = $cache_hash->{calc_abc} + // $cache_hash->{calc_abc2} + // $cache_hash->{calc_abc3} + // $self->calc_abc(@_); - # fall back to calc_abc if nothing had an explicit abc dependency - my $cached = $cache_key - ? $cache_hash->{$cache_key} - : $self->calc_abc(@_); - - croak 'No previous calc_abc results found' - if !$cached; - - return wantarray ? %$cached : $cached; + return wantarray ? %$results : $results; } sub get_metadata_calc_abc { @@ -1813,20 +1804,22 @@ sub _calc_abc_dispatcher { // $args{label_list2} ); + state $empty_array = []; + return $self->_calc_abc_pairwise_mode(%args) - if $self->get_pairwise_mode - && @{$args{element_list1} // []} == 1 - && @{$args{element_list2} // []} == 1 - && !$have_lb_lists; + if @{$args{element_list1} // $empty_array} == 1 + && @{$args{element_list2} // $empty_array} == 1 + && $self->get_pairwise_mode + && !$have_lb_lists; return $self->_calc_abc_hierarchical_mode(%args) if $args{current_node_details} - && !$have_lb_lists - && $self->get_hierarchical_mode; + && !$have_lb_lists + && $self->get_hierarchical_mode; return $self->_calc_abc(%args) if is_hashref($args{element_list1}) - || @{$args{element_list1} // []} != 1 + || @{$args{element_list1} // $empty_array} != 1 || defined $args{element_list2} || $have_lb_lists; diff --git a/lib/Biodiverse/Indices/PhyloCom.pm b/lib/Biodiverse/Indices/PhyloCom.pm index 67f0b6414..80632aa21 100644 --- a/lib/Biodiverse/Indices/PhyloCom.pm +++ b/lib/Biodiverse/Indices/PhyloCom.pm @@ -347,7 +347,7 @@ sub _calc_phylo_mpd_mntd { # Save some cycles if all the weights are the same. # If we ever implement dissim then we can also check label_hash2. if ($use_wts && $label_hashrefs_are_same) { - if (not List::Util::any {$_ != 1} values %$label_hash1) { + if (List::Util::all {$_ == 1} values %$label_hash1) { $use_wts = undef; } } diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 94b0d4050..8340cf694 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -664,10 +664,18 @@ sub get_path_lengths_to_root_node { # but first option is faster still my $len_hash = $tree_ref->get_node_length_hash; if (HAVE_BD_UTILS_108) { - # get keys and vals in one call - Biodiverse::Utils::XS::add_hash_keys_and_vals_until_exists_AoA ( - $path_hash, \@collected_paths, $len_hash, - ); + # sometimes there are no paths + if (@collected_paths) { + # direct assign the first (could do first few, or the longest but that needs another scan) + my $first = shift @collected_paths; + @$path_hash{@$first} = @$len_hash{@$first}; + if (@collected_paths) { + # get keys and vals in one call + Biodiverse::Utils::XS::add_hash_keys_and_vals_until_exists_AoA( + $path_hash, \@collected_paths, $len_hash, + ); + } + } } elsif (HAVE_BD_UTILS) { Biodiverse::Utils::copy_values_from ($path_hash, $len_hash); @@ -772,7 +780,8 @@ sub get_metadata_calc_pe_lists { name => 'Phylogenetic Endemism lists', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe'], + pre_calc => [qw /_calc_pe_lists/], + pre_calc_global => [ 'get_node_range_hash' ], uses_nbr_lists => 1, distribution => 'nonnegative', indices => { @@ -800,6 +809,12 @@ sub calc_pe_lists { my @keys = qw /PE_WTLIST PE_RANGELIST PE_LOCAL_RANGELIST/; my %results = %args{@keys}; + # PE_RANGELIST used to be done in _calc_pe + if (!$results{PE_RANGELIST}) { + \my %ranges = $args{node_range}; + my %h = %ranges{keys %{$results{PE_WTLIST}}}; + $results{PE_RANGELIST} = \%h; + } return wantarray ? %results : \%results; } @@ -830,7 +845,7 @@ END_PEC_DESC name => 'Phylogenetic Endemism central', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => [qw /_calc_pe _calc_phylo_abc_lists/], + pre_calc => [qw /_calc_pe _calc_pe_lists _calc_phylo_abc_lists/], pre_calc_global => [qw /get_trimmed_tree/], uses_nbr_lists => 1, # how many lists it must have formula => $formula, @@ -888,7 +903,7 @@ END_PEC_DESC name => 'Phylogenetic Endemism central lists', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => [qw /_calc_pe _calc_phylo_abc_lists/], + pre_calc => [qw /_calc_pe calc_pe_lists _calc_phylo_abc_lists/], uses_nbr_lists => 1, # how many lists it must have distribution => 'nonnegative', indices => { @@ -1100,7 +1115,7 @@ sub get_metadata_calc_pe_clade_contributions { name => 'PE clade contributions', reference => '', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe', 'get_sub_tree_as_hash'], + pre_calc => [qw/_calc_pe _calc_pe_lists get_sub_tree_as_hash/], pre_calc_global => ['get_trimmed_tree'], uses_nbr_lists => 1, distribution => 'nonnegative', # default @@ -1417,7 +1432,7 @@ EOD name => 'Phylogenetic Endemism single', reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', type => 'Phylogenetic Endemism Indices', - pre_calc => ['_calc_pe'], + pre_calc => [qw/_calc_pe calc_pe_lists/], pre_calc_global => ['get_trimmed_tree'], uses_nbr_lists => 1, indices => { @@ -1768,14 +1783,14 @@ sub get_inverse_range_weighted_path_lengths { my %args = @_; my $tree = $args{tree_ref}; - my $node_ranges = $args{node_range}; - + \my %node_ranges = $args{node_range}; + \my %node_length_hash = $tree->get_node_length_hash; + my %range_weighted; - - foreach my $node ($tree->get_node_refs) { - my $name = $node->get_name; - next if !$node_ranges->{$name}; - $range_weighted{$name} = $node->get_length / $node_ranges->{$name}; + + foreach my $name (keys %node_length_hash) { + next if !$node_ranges{$name}; + $range_weighted{$name} = $node_length_hash{$name} / $node_ranges{$name}; } my %results = (inverse_range_weighted_node_lengths => \%range_weighted); @@ -2653,7 +2668,7 @@ sub get_metadata__calc_phylo_abc_lists { name => 'Phylogenetic ABC lists', description => 'Calculate the sets of shared and not shared branches between two sets of labels', type => 'Phylogenetic Indices', - pre_calc => 'calc_abc', + pre_calc => '_calc_abc_any', pre_calc_global => [qw /get_trimmed_tree get_path_length_cache set_path_length_cache_by_group_flag/], uses_nbr_lists => 1, # how many sets of lists it must have required_args => {tree_ref => 1}, @@ -2679,7 +2694,9 @@ sub _calc_phylo_abc_lists { el_list => $args{element_list1}, ); - if (!@{$args{element_list2}}) { + # $args{C} is the count of labels unique to set 2 + # so if it is zero then we can short-circuit. + if ($args{C} == 0) { my $res = { PHYLO_A_LIST => {}, PHYLO_B_LIST => $nodes_in_path1, @@ -2688,14 +2705,12 @@ sub _calc_phylo_abc_lists { return wantarray ? %$res : $res; } - my $nodes_in_path2 = @{$args{element_list2}} - ? $self->get_path_lengths_to_root_node ( - %args, - labels => $label_hash2, - tree_ref => $tree, - el_list => $args{element_list2}, - ) - : {}; + my $nodes_in_path2 = $self->get_path_lengths_to_root_node ( + %args, + labels => $label_hash2, + tree_ref => $tree, + el_list => $args{element_list2}, + ); my %results; # one day we can clean this all up diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index c4f42c8a3..e51ed4af0 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -12,6 +12,8 @@ no warnings 'experimental::refaliasing'; use List::Util qw /sum/; +my $metadata_class = 'Biodiverse::Metadata::Indices'; + sub _calc_pe { my $self = shift; my %args = @_; @@ -25,7 +27,6 @@ sub _calc_pe { my $tree_ref = $args{trimmed_tree}; my $results_cache = $args{PE_RESULTS_CACHE}; - \my %node_ranges = $args{node_range}; \my %rw_node_lengths = $args{inverse_range_weighted_node_lengths}; my $bd = $args{basedata_ref} || $self->get_basedata_ref; @@ -33,21 +34,7 @@ sub _calc_pe { # default these to undef - more meaningful than zero my ($PE_WE, $PE_WE_P); - my (%wts, %local_ranges, %results); - - # prob a micro-optimisation, but might avoid - # some looping below when collating weights - # and one group has many more labels than the other - if (@$element_list_all == 2) { - my $count0 = $bd->get_richness_aa ($element_list_all->[0]); - my $count1 = $bd->get_richness_aa ($element_list_all->[1]); - if ($count1 > $count0) { - $element_list_all = [ - $element_list_all->[1], - $element_list_all->[0], - ]; - } - } + my %results; foreach my $group (@$element_list_all) { my $results_this_gp; @@ -102,32 +89,6 @@ sub _calc_pe { $PE_WE += $results_this_gp->{PE_WE}; } - # Avoid some redundant slicing and dicing when we have only one group - # Pays off when processing large data sets - if (scalar @$element_list_all == 1) { - # no need to collate anything so make a shallow copy - @results{keys %$results_this_gp} = values %$results_this_gp; - # but we do need to add to the local range hash - my $hashref = $results_this_gp->{PE_WTLIST}; - @local_ranges{keys %$hashref} = (1) x scalar keys %$hashref; - } - else { - # refalias might be a nano-optimisation here... - \my %wt_hash = $results_this_gp->{PE_WTLIST}; - - # weights need to be summed, - # unless we are starting from a blank slate - if (keys %wts) { - foreach my $node (keys %wt_hash) { - $wts{$node} += $wt_hash{$node}; - $local_ranges{$node}++; - } - } - else { - %wts = %wt_hash; - @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; - } - } } { @@ -138,21 +99,8 @@ sub _calc_pe { $PE_WE_P = eval {$PE_WE / $total_tree_length}; } - # need the collated versions for multiple elements - if (scalar @$element_list_all > 1) { - $results{PE_WE} = $PE_WE; - $results{PE_WTLIST} = \%wts; - my %nranges = %node_ranges{keys %wts}; - $results{PE_RANGELIST} = \%nranges; - } - else { - my %nranges = %node_ranges{keys %{$results{PE_WTLIST}}}; - $results{PE_RANGELIST} = \%nranges; - } - - # need to set these + $results{PE_WE} = $PE_WE; $results{PE_WE_P} = $PE_WE_P; - $results{PE_LOCAL_RANGELIST} = \%local_ranges; return wantarray ? %results : \%results; } @@ -162,8 +110,6 @@ sub _calc_pe { sub _calc_pe_hierarchical { my ($self, %args) = @_; - my $element_list_all = $args{element_list_all}; - my $node_data = $args{current_node_details} // croak 'Must pass the current node details when in hierarchical mode'; my $node_name = $node_data->{name} @@ -172,12 +118,11 @@ sub _calc_pe_hierarchical { my $tree_ref = $args{trimmed_tree}; my $results_cache = $args{PE_RESULTS_CACHE}; - \my %node_ranges = $args{node_range}; # default these to undef - more meaningful than zero my ($PE_WE, $PE_WE_P); - my (%wts, %local_ranges, %results); + my %results; foreach my $group (@$child_names) { my $results_this_gp; @@ -196,6 +141,151 @@ sub _calc_pe_hierarchical { if (defined $results_this_gp->{PE_WE}) { $PE_WE += $results_this_gp->{PE_WE}; } + } + + { + no warnings 'uninitialized'; + my $total_tree_length = $tree_ref->get_total_tree_length; + + #Phylogenetic endemism = sum for all nodes of: + # (branch length/total tree length) / node range + $PE_WE_P = eval {$PE_WE / $total_tree_length}; + } + + $results{PE_WE} = $PE_WE; + $results{PE_WE_P} = $PE_WE_P; + + $results_cache->{$node_name}{PE_WE} = $results{PE_WE}; + + return wantarray ? %results : \%results; +} + +sub get_metadata__calc_pe_lists { + + my %metadata = ( + description => 'Phylogenetic endemism (PE) base lists.', + name => 'Phylogenetic Endemism base lists', + reference => 'Rosauer et al (2009) Mol. Ecol. https://doi.org/10.1111/j.1365-294X.2009.04311.x', + type => 'Phylogenetic Endemism Indices', # keeps it clear of the other indices in the GUI + pre_calc_global => [ qw / + get_node_range_hash + get_trimmed_tree + get_pe_element_cache + get_path_length_cache + set_path_length_cache_by_group_flag + get_inverse_range_weighted_path_lengths + /], + pre_calc => [qw/_calc_abc_any _calc_pe/], + uses_nbr_lists => 1, # how many lists it must have + required_args => {'tree_ref' => 1}, + ); + + return $metadata_class->new(\%metadata); +} + +sub _calc_pe_lists { + my $self = shift; + my %args = @_; + + my $element_list_all = $args{element_list_all}; + + return $self->_calc_pe_lists_hierarchical(%args) + if defined $args{current_node_details} + && $self->get_hierarchical_mode + && @$element_list_all > 1; + + my $results_cache = $args{PE_RESULTS_CACHE}; + \my %rw_node_lengths = $args{inverse_range_weighted_node_lengths}; + + my $bd = $args{basedata_ref} || $self->get_basedata_ref; + + my $el_count = @$element_list_all; + + # prob a micro-optimisation, but might avoid + # some looping below when collating weights + # and one group has many more labels than the other + if ($el_count == 2) { + my $count0 = $bd->get_richness_aa ($element_list_all->[0]); + my $count1 = $bd->get_richness_aa ($element_list_all->[1]); + if ($count1 > $count0) { + $element_list_all = [ + $element_list_all->[1], + $element_list_all->[0], + ]; + } + } + + my (%wts, %local_ranges, %results); + + foreach my $group (@$element_list_all) { + # this is populated by _calc_pe under the calc dependency system + my $results_this_gp = $results_cache->{$group}; + # but can be called directly so need to handle that and populate the cache + if (!exists $results_this_gp->{PE_WTLIST}) { + $self->_calc_pe(%args); + $results_this_gp = $results_cache->{$group}; + } + + # Avoid some redundant slicing and dicing when we have only one group + # Pays off when processing large data sets + if ($el_count == 1) { + # no need to collate anything so make a shallow copy + $results{PE_WTLIST} = $results_this_gp->{PE_WTLIST}; + # but we do need to add to the local range hash + my $hashref = $results_this_gp->{PE_WTLIST}; + @local_ranges{keys %$hashref} = (1) x scalar keys %$hashref; + } + else { + # refalias might be a nano-optimisation here... + \my %wt_hash = $results_this_gp->{PE_WTLIST}; + + # Local ranges need to be summed unless + # we are starting from a blank slate. + # Weights are aggregated later. + if (keys %local_ranges) { + # postfix for speed + $local_ranges{$_}++ + foreach keys %wt_hash; + } + else { + @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; + } + } + } + + # collate + $wts{$_} = $rw_node_lengths{$_} * $local_ranges{$_} + for keys %local_ranges; + $results{PE_WTLIST} = \%wts; + + $results{PE_LOCAL_RANGELIST} = \%local_ranges; + + return wantarray ? %results : \%results; +} + +sub _calc_pe_lists_hierarchical { + my ($self, %args) = @_; + + my $element_list_all = $args{element_list_all}; + + my $node_data = $args{current_node_details} + // croak 'Must pass the current node details when in hierarchical mode'; + my $node_name = $node_data->{name} + // croak 'Missing current node name in hierarchical mode'; + my $child_names = $node_data->{child_names}; + + my $results_cache = $args{PE_RESULTS_CACHE}; + + my (%wts, %local_ranges, %results); + + foreach my $group (@$child_names) { + # this is populated by _calc_pe under the calc dependency system + my $results_this_gp = $results_cache->{$group}; + # but can be called directly so need to handle that and populate the cache + if (!exists $results_this_gp->{PE_WTLIST}) { + $self->_calc_pe(%args); + $results_this_gp = $results_cache->{$group}; + } # Avoid some redundant slicing and dicing when we have only one group # Pays off when processing large data sets @@ -215,42 +305,23 @@ sub _calc_pe_hierarchical { if (keys %wts) { foreach my $node (keys %wt_hash) { $wts{$node} += $wt_hash{$node}; - $local_ranges{$node}++; } } else { %wts = %wt_hash; - @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; } - } - } - { - no warnings 'uninitialized'; - my $total_tree_length = $tree_ref->get_total_tree_length; - - #Phylogenetic endemism = sum for all nodes of: - # (branch length/total tree length) / node range - $PE_WE_P = eval {$PE_WE / $total_tree_length}; - } - - # need the collated versions for multiple elements - if (scalar @$element_list_all > 1) { - $results{PE_WE} = $PE_WE; - $results{PE_WTLIST} = \%wts; - my %nranges = %node_ranges{keys %wts}; - $results{PE_RANGELIST} = \%nranges; - } - else { - my %nranges = %node_ranges{keys %{$results{PE_WTLIST}}}; - $results{PE_RANGELIST} = \%nranges; + my $cached_ranges = $results_this_gp->{PE_LOCAL_RANGELIST}; + $local_ranges{$_} += $cached_ranges->{$_} // 1 + foreach keys %wt_hash; + } } - # need to set these - $results{PE_WE_P} = $PE_WE_P; + $results{PE_WTLIST} = \%wts; $results{PE_LOCAL_RANGELIST} = \%local_ranges; - $results_cache->{$node_name} = {%results{qw/PE_WE PE_WTLIST/}}; + $results_cache->{$node_name}{PE_WTLIST} = \%wts; + $results_cache->{$node_name}{PE_LOCAL_RANGELIST} = \%local_ranges; return wantarray ? %results : \%results; } diff --git a/lib/Biodiverse/Indices/PhylogeneticRelative.pm b/lib/Biodiverse/Indices/PhylogeneticRelative.pm index 7513da897..90d0a0cf9 100644 --- a/lib/Biodiverse/Indices/PhylogeneticRelative.pm +++ b/lib/Biodiverse/Indices/PhylogeneticRelative.pm @@ -3,6 +3,8 @@ use 5.022; use strict; use warnings; +use experimental qw/refaliasing/; + use English qw /-no_match_vars/; use List::Util qw /sum first/; @@ -236,11 +238,14 @@ sub get_metadata_calc_phylo_rpe_central { name => 'Relative Phylogenetic Endemism, central', reference => 'Mishler et al. (2014) https://doi.org/10.1038/ncomms5473', type => 'Phylogenetic Indices (relative)', - pre_calc => [qw /calc_pe_central calc_pe_central_lists calc_elements_used/], + pre_calc => [qw /calc_pe_central calc_pe_central_lists calc_elements_used _calc_abc_any/], pre_calc_global => [qw / get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths get_trimmed_tree_eq_branch_lengths_node_length_hash + get_trimmed_tree_range_inverse_hash_nonzero_len + get_pe_element_cache + get_rpe_element_cache /], uses_nbr_lists => 1, indices => { @@ -266,33 +271,74 @@ sub calc_phylo_rpe_central { my $self = shift; my %args = @_; - my $results; - - if (!@{$args{element_list2} // []}) { + if (!@{$args{element_list2} // []} || !($args{C} // 1)) { # We just copy the calc_phylo_rpe2 results - # if there are no nbrs in set2 + # if there are no nbrs in set2 or all the + # labels are common my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); - $results = $cache_hash->{calc_phylo_rpe2}; + my $results = $cache_hash->{calc_phylo_rpe2} + // $self->calc_phylo_rpe2( + %args, + PE_WE_P => $args{PEC_WE_P}, + PE_WE => $args{PEC_WE}, + PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, + ); + + my %results2; + foreach my $key (keys %$results) { + # will need to be changed if we rename the RPE indices + my $new_key = ($key =~ s/2$/C/r); + $results2{$new_key} = $results->{$key}; + } + + return wantarray ? %results2 : \%results2; } - if (!$results) { - $results = $self->calc_phylo_rpe2( - %args, - PE_WE_P => $args{PEC_WE_P}, - PE_WE => $args{PEC_WE}, - PE_RANGELIST => $args{PEC_RANGELIST}, - PE_LOCAL_RANGELIST => $args{PEC_LOCAL_RANGELIST}, - ); + my $pe_p_score = $args{PEC_WE_P}; + + my $orig_tree_ref = $args{trimmed_tree}; + my $orig_total_tree_length = $orig_tree_ref->get_total_tree_length; + + my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; + my $null_total_tree_length = $null_tree_ref->get_total_tree_length; + + my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; + + # Get the PE score assuming equal branch lengths + my ($pe_null, $null, $phylo_rpe2, $diff); + + # need to work over the lists + \my %node_ranges_local = $args{PEC_LOCAL_RANGELIST}; + + # First condition optimises for the common case where all local ranges are 1 + if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1) { + $pe_null += $_ foreach @range_inverse{keys %node_ranges_local}; + } + else { + # postfix for speed + $pe_null + += $range_inverse{$_} + * $node_ranges_local{$_} + foreach keys %node_ranges_local; } + $pe_null *= $default_eq_len if $pe_null; - my %results2; - foreach my $key (keys %$results) { - # will need to be changed if we rename the RPE indices - my $new_key = ($key =~ s/2$/C/r); - $results2{$new_key} = $results->{$key}; + { + no warnings qw /numeric uninitialized/; + # null is equiv to PE_WE_P for the equalised tree + $null = eval {$pe_null / $null_total_tree_length}; + $phylo_rpe2 = eval {$pe_p_score / $null}; + $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; } - return wantarray ? %results2 : \%results2; + my $results = { + PHYLO_RPEC => $phylo_rpe2, + PHYLO_RPE_NULLC => $null, + PHYLO_RPE_DIFFC => $diff, + }; + + return wantarray ? %$results : $results; } @@ -306,11 +352,14 @@ sub get_metadata_calc_phylo_rpe2 { name => 'Relative Phylogenetic Endemism, type 2', reference => 'Mishler et al. (2014) https://doi.org/10.1038/ncomms5473', type => 'Phylogenetic Indices (relative)', - pre_calc => [qw /calc_pe calc_pe_lists calc_elements_used/], + pre_calc => [qw /_calc_pe calc_elements_used _calc_abc_any/], pre_calc_global => [qw / get_trimmed_tree get_trimmed_tree_with_equalised_branch_lengths get_trimmed_tree_eq_branch_lengths_node_length_hash + get_trimmed_tree_range_inverse_hash_nonzero_len + get_pe_element_cache + get_rpe_element_cache /], uses_nbr_lists => 1, indices => { @@ -336,9 +385,21 @@ sub calc_phylo_rpe2 { my $self = shift; my %args = @_; - if (!@{$args{element_list2} // []}) { + my $pe_p_score = $args{PE_WE_P}; + + # no point calculating anything if PE is undef + if (!defined $pe_p_score) { + my %results = ( + PHYLO_RPE2 => undef, + PHYLO_RPE_NULL2 => undef, + PHYLO_RPE_DIFF2 => undef, + ); + return wantarray ? %results : \%results; + } + + if (!@{$args{element_list2} // []} || !($args{C} // 1) ) { # We just copy the calc_phylo_rpe_central results - # if there are no nbrs in set2 + # if there are no nbrs or no different labels in set2 my $cache_hash = $self->get_param('AS_RESULTS_FROM_LOCAL'); if (my $cached = $cache_hash->{calc_phylo_rpe_central}) { my %results; @@ -351,62 +412,55 @@ sub calc_phylo_rpe2 { } } - my $orig_tree_ref = $args{trimmed_tree}; my $orig_total_tree_length = $orig_tree_ref->get_total_tree_length; my $null_tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED}; my $null_total_tree_length = $null_tree_ref->get_total_tree_length; - my $pe_p_score = $args{PE_WE_P}; - my $pe_score = $args{PE_WE}; + my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; + \my %range_inverse = $args{trimmed_tree_range_inverse_hash_nonzero_len}; - # no point calculating anything if PE is undef - if (!defined $pe_score) { - my %results = ( - PHYLO_RPE2 => undef, - PHYLO_RPE_NULL2 => undef, - PHYLO_RPE_DIFF2 => undef, - ); - return wantarray ? %results : \%results; - } + my $element_list_all = $args{element_list_all}; - my $node_ranges_local = $args{PE_LOCAL_RANGELIST}; - my $node_ranges_global = $args{PE_RANGELIST}; - my $null_node_len_hash = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH_HASH}; - my $default_eq_len = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED_NODE_LENGTH}; - # Get the PE score assuming equal branch lengths my ($pe_null, $null, $phylo_rpe2, $diff); - # First condition optimises for the common case where all local ranges are 1 - my $zero_len_branch_names = $null_tree_ref->get_zero_node_length_hash; - if (($args{EL_COUNT_ALL} // $args{EL_COUNT_SET1} // 0) == 1 - && scalar keys %$zero_len_branch_names < 10 # arbitrary number - ) { - delete local @$node_ranges_global{keys %$zero_len_branch_names}; - $pe_null += (1 / $_) foreach values %$node_ranges_global; - $pe_null *= $default_eq_len; - #say STDERR 'jjjj'; - } - elsif (HAVE_BD_UTILS) { - $pe_null = Biodiverse::Utils::get_rpe_null ( - $null_node_len_hash, - $node_ranges_local, - $node_ranges_global, - ); - } - else { - foreach my $null_node (keys %$node_ranges_global) { - $pe_null += $null_node_len_hash->{$null_node} - * $node_ranges_local->{$null_node} - / $node_ranges_global->{$null_node}; + my $results_cache = $args{RPE_RESULTS_CACHE}; + my $pe_results_cache = $args{PE_RESULTS_CACHE}; + + foreach my $group (@$element_list_all) { + my $results_this_gp; + # use the cached results for a group if present + if (exists $results_cache->{$group}) { + $results_this_gp = $results_cache->{$group}; + } + # else build them and cache them + else { + # precalcs mean this should exist + my $pe_cached = $pe_results_cache->{$group} + // croak "PE cache entry for $group not yet calculated"; + my $nodes_in_path = $pe_cached->{PE_WTLIST}; + + my $gp_score; + $gp_score += $_ foreach @range_inverse{keys %$nodes_in_path}; + $gp_score *= $default_eq_len if $gp_score; + + $results_this_gp = { RPE_WE => $gp_score }; + + $results_cache->{$group} = $results_this_gp; + } + + if (defined $results_this_gp->{RPE_WE}) { + $pe_null += $results_this_gp->{RPE_WE}; } + } { no warnings qw /numeric uninitialized/; - $null = eval {$pe_null / $null_total_tree_length}; # equiv to PE_WE_P for the equalised tree + # null is equiv to PE_WE_P for the equalised tree + $null = eval {$pe_null / $null_total_tree_length}; $phylo_rpe2 = eval {$pe_p_score / $null}; $diff = eval {$orig_total_tree_length * ($pe_p_score - $null)}; } @@ -667,21 +721,20 @@ sub get_metadata_get_trimmed_tree_eq_branch_lengths_node_length_hash { return $metadata_class->new(\%metadata); } -# should just be a wrapper around Tree::get_node_length_hash sub get_trimmed_tree_eq_branch_lengths_node_length_hash { my $self = shift; my %args = @_; my $tree_ref = $args{TREE_REF_EQUALISED_BRANCHES_TRIMMED} // croak 'Missing TREE_REF_EQUALISED_BRANCHES_TRIMMED arg'; - my $node_hash = $tree_ref->get_node_hash; - - my (%len_hash, $nonzero_length); - foreach my $node_name (keys %$node_hash) { - my $node_ref = $node_hash->{$node_name}; - my $length = $node_ref->get_length; - $len_hash{$node_name} = $length; - $nonzero_length ||= $length; + + \my %len_hash = $tree_ref->get_node_length_hash; + + my $nonzero_length; + + foreach my $len (values %len_hash) { + $nonzero_length ||= $len; + last if $len; } my %results = ( @@ -692,5 +745,102 @@ sub get_trimmed_tree_eq_branch_lengths_node_length_hash { return wantarray ? %results : \%results; } +sub get_metadata_get_trimmed_tree_range_inverse_hash { + my %metadata = ( + name => 'get_trimmed_tree_range_inverse_hash', + description + => "Get a hash of the node range inverse values\n" + . "Forms the basis of the RPE calcs for equal area cells", + pre_calc_global => ['get_node_range_hash'], + indices => { + trimmed_tree_range_inverse_hash => { + description => 'Hash of trimmed tree range inverse values', + }, + }, + ); + return $metadata_class->new(\%metadata); +} + +sub get_trimmed_tree_range_inverse_hash { + my $self = shift; + my %args = @_; + + # my $tree = $args{TRIMMED_TREE}; + my $node_ranges = $args{node_range}; + + my %range_weighted; + + foreach my $name (keys %$node_ranges) { + my $range = $node_ranges->{$name} || next; + $range_weighted{$name} = 1 / $range; + } + + my %results = (trimmed_tree_range_inverse_hash => \%range_weighted); + + return wantarray ? %results : \%results; +} + +sub get_metadata_get_trimmed_tree_range_inverse_hash_nonzero_len { + my %metadata = ( + name => 'get_trimmed_tree_range_inverses_nonzero_len', + description + => "Get a hash of the node range inverse values for non-zero lengths\n" + . "Forms the basis of the RPE calcs for equal area cells", + pre_calc_global => ['get_node_range_hash', 'get_trimmed_tree'], + indices => { + trimmed_tree_range_inverse_hash_nonzero_len => { + description => 'Hash of trimmed tree range inverse values for nodes with non-zero length', + }, + }, + ); + return $metadata_class->new(\%metadata); +} + +sub get_trimmed_tree_range_inverse_hash_nonzero_len { + my $self = shift; + my %args = @_; + + my $tree = $args{trimmed_tree}; + \my %node_ranges = $args{node_range}; + \my %length_hash = $tree->get_node_length_hash; + + my %range_weighted; + + foreach my $name (keys %node_ranges) { + my $range = $node_ranges{$name} || next; + $range_weighted{$name} = ($length_hash{$name} ? 1 : 0) / $range; + } + + my %results = (trimmed_tree_range_inverse_hash_nonzero_len => \%range_weighted); + + return wantarray ? %results : \%results; +} + + + +sub get_metadata_get_rpe_element_cache { + + my %metadata = ( + name => 'get_rpe_element_cache', + description => 'Create a hash in which to cache the PE_alt scores for each element', + indices => { + RPE_RESULTS_CACHE => { + description => 'The hash in which to cache the PE_alt scores for each element' + }, + }, + ); + + return $metadata_class->new(\%metadata); +} + +# create a hash in which to cache the PE scores for each element +# this is called as a global precalc and then used or modified by each element as needed +sub get_rpe_element_cache { + my $self = shift; + my %args = @_; + + my %results = (RPE_RESULTS_CACHE => {}); + return wantarray ? %results : \%results; +} 1; diff --git a/lib/Biodiverse/Indices/RWTurnover.pm b/lib/Biodiverse/Indices/RWTurnover.pm index ccfc9bb58..0f0217ad6 100644 --- a/lib/Biodiverse/Indices/RWTurnover.pm +++ b/lib/Biodiverse/Indices/RWTurnover.pm @@ -341,16 +341,15 @@ sub _calc_pe_lists_per_element_set { BY_LIST: foreach my $list_name (qw /element_list1 element_list2/) { $i++; # start at 1 so we match the numbered names - my $el_list = $args{$list_name} // next BY_LIST; - \my @elements = $el_list; # FIXME + \my @elements = $args{$list_name} // next BY_LIST; my $have_cache = (@elements == 1 && $cache->{$elements[0]}); $results[$i] = $have_cache ? $cache->{$elements[0]} - : $self->_calc_pe( + : $self->_calc_pe_lists( %args, - element_list_all => \@elements, - ); + element_list_all => $args{$list_name}, + ); $cache->{$elements[0]} = $results[$i] if @elements == 1; } diff --git a/lib/Biodiverse/Tree.pm b/lib/Biodiverse/Tree.pm index 3d20b53ba..be6a47a69 100644 --- a/lib/Biodiverse/Tree.pm +++ b/lib/Biodiverse/Tree.pm @@ -3095,20 +3095,17 @@ sub clone_tree_with_equalised_branch_lengths { my $name = $args{name} // ( $self->get_param('NAME') . ' EQ' ); - my $non_zero_len = $args{node_length}; + my $non_zero_len = $args{node_length} + // ($self->get_total_tree_length / ( $self->get_nonzero_length_count || 1 )); - if ( !defined $non_zero_len ) { - # my $non_zero_node_count = grep { $_->get_length } $self->get_node_refs; - # this caches - my $non_zero_node_count = $self->get_nonzero_length_count; - $non_zero_len = - $self->get_total_tree_length / ( $non_zero_node_count || 1 ); - } + \my %orig_node_length_hash = $self->get_node_length_hash; my $new_tree = $self->clone_without_caches; + \my %new_node_hash = $new_tree->get_node_hash; - foreach my $node ( $new_tree->get_node_refs ) { - $node->set_length_aa ( $node->get_length ? $non_zero_len : 0 ); + foreach my $name ( keys %new_node_hash ) { + my $node = $new_node_hash{$name}; + $node->set_length_aa ( $orig_node_length_hash{$name} ? $non_zero_len : 0 ); } $new_tree->rename( new_name => $name ); diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index 0100c5278..052f24325 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -219,12 +219,12 @@ sub test_rw_turnover_mx { sub test_cluster_node_calcs { my %args = @_; - my $bd = $args{basedata_ref} || get_basedata_object_from_site_data(CELL_SIZES => [300000, 300000]); + my $bd = $args{basedata_ref} || get_basedata_object_from_site_data(CELL_SIZES => [400000, 400000]); my $prng_seed = $args{prng_seed} || $default_prng_seed; my $tree_ref = $args{tree_ref} || get_tree_object_from_sample_data(); - my $calcs = [qw/calc_pe calc_pd/]; + my $calcs = [qw/calc_pe calc_pe_lists calc_phylo_rpe2 calc_pe_central_lists calc_pd/]; my $cl1 = $bd->add_cluster_output (name => 'cl1'); $cl1->run_analysis ( @@ -250,6 +250,7 @@ sub test_cluster_node_calcs { is [sort keys %$node_hash1], [sort keys %$node_hash2], 'paranoia check: same node names'; + my $prec = "%.10f"; my (%aggregate1, %aggregate2); foreach my $node_name (sort keys %$node_hash1) { my $node1 = $node_hash1->{$node_name}; @@ -258,8 +259,8 @@ sub test_cluster_node_calcs { foreach my $list_name (sort grep {$_ !~ /NODE_VALUES/}$node1->get_hash_lists) { my $ref1 = $node1->get_list_ref_aa($list_name); my $ref2 = $node2->get_list_ref_aa($list_name); - my $snapped1 = {map {$_ => sprintf "%.10f", $ref1->{$_}} keys %$ref1}; - my $snapped2 = {map {$_ => sprintf "%.10f", $ref2->{$_}} keys %$ref2}; + my $snapped1 = {map {$_ => snap_to_precision ($ref1->{$_}, $prec)} keys %$ref1}; + my $snapped2 = {map {$_ => snap_to_precision ($ref2->{$_}, $prec)} keys %$ref2}; $aggregate1{$node_name}{$list_name} = $snapped1; $aggregate2{$node_name}{$list_name} = $snapped2; }