diff --git a/lib/Biodiverse/Cluster.pm b/lib/Biodiverse/Cluster.pm index 002a47c0c..61c3a2593 100644 --- a/lib/Biodiverse/Cluster.pm +++ b/lib/Biodiverse/Cluster.pm @@ -2777,7 +2777,9 @@ sub sp_calc { ); # drop out if we have none to do - return if $indices_object->get_valid_calculation_count == 0; + return if $indices_object->get_valid_calculation_count == 0; + + $indices_object->set_hierarchical_mode(!$args{no_hierarchical_mode}); delete $args{calculations}; # saves passing it onwards when we call the calculations delete $args{analyses}; # for backwards compat @@ -2809,16 +2811,26 @@ sub sp_calc { $count / $to_do, ); + # needs a better name + my $current_node_details = { + name => $node->get_name, + child_names => [map {$_->get_name} $node->get_children], + }; + my %sp_calc_values = $indices_object->run_calculations( %args, - element_list1 => [keys %{$node->get_terminal_elements}] + element_list1 => [ keys %{$node->get_terminal_elements} ], + current_node_details => $current_node_details, ); foreach my $key (keys %sp_calc_values) { if (is_arrayref($sp_calc_values{$key}) || is_hashref($sp_calc_values{$key})) { - $node->add_to_lists ($key => $sp_calc_values{$key}); + $node->add_to_lists ( + $key => $sp_calc_values{$key}, + use_ref => 1, + ); delete $sp_calc_values{$key}; } } @@ -2830,6 +2842,8 @@ sub sp_calc { $self->delete_cached_metadata; + $indices_object->set_hierarchical_mode(0); + return 1; } diff --git a/lib/Biodiverse/Common.pm b/lib/Biodiverse/Common.pm index cf34d7ba8..feb2d0f38 100644 --- a/lib/Biodiverse/Common.pm +++ b/lib/Biodiverse/Common.pm @@ -266,12 +266,18 @@ sub load_yaml_file { croak 'Loading from a YAML file is no longer supported'; } +# Orig should never have used a hash. Oh well. +sub set_basedata_ref_aa { + my ($self, $ref) = @_; + $self->set_basedata_ref(BASEDATA_REF => $ref); +} + sub set_basedata_ref { my $self = shift; my %args = @_; $self->set_param (BASEDATA_REF => $args{BASEDATA_REF}); - $self->weaken_basedata_ref; + $self->weaken_basedata_ref if defined $args{BASEDATA_REF}; return; } @@ -279,10 +285,7 @@ sub set_basedata_ref { sub get_basedata_ref { my $self = shift; - my $bd = $self->get_param ('BASEDATA_REF') - || Biodiverse::MissingBasedataRef->throw ( - message => 'Parameter BASEDATA_REF not set' - ); + my $bd = $self->get_param ('BASEDATA_REF'); return $bd; } @@ -2415,18 +2418,19 @@ sub compare_lists_by_item { \my %base_ref = $args{base_list_ref}; \my %comp_ref = $args{comp_list_ref}; \my %results = $args{results_list_ref}; + my ($diff, $increment); COMP_BY_ITEM: foreach my $index (keys %base_ref) { next COMP_BY_ITEM - if !(defined $base_ref{$index} && defined $comp_ref{$index}); + if !(defined $comp_ref{$index} && defined $base_ref{$index}); # compare at 10 decimal place precision # this also allows for serialisation which # rounds the numbers to 15 decimals - my $diff = $base_ref{$index} - $comp_ref{$index}; - my $increment = $diff > DEFAULT_PRECISION_SMALL ? 1 : 0; + $diff = $base_ref{$index} - $comp_ref{$index}; + $increment = $diff > DEFAULT_PRECISION_SMALL ? 1 : 0; # for debug, but leave just in case #carp "$element, $op\n$comp\n$base " . ($comp - $base) if $increment; @@ -2438,18 +2442,18 @@ sub compare_lists_by_item { # SUMX is the sum of compared values # SUMXX is the sum of squared compared values # The latter two are used in z-score calcs - $results{"C_$index"} += $increment; - $results{"Q_$index"} ++; - $results{"P_$index"} = $results{"C_$index"} - / $results{"Q_$index"}; + # obfuscated to squeeze as much speed as we can + # $results{"C_$index"} += $increment; + # $results{"Q_$index"} ++; + $results{"P_$index"} = ($results{"C_$index"} += $increment) + / (++$results{"Q_$index"}); # use original vals for sums - $results{"SUMX_$index"} += $comp_ref{$index}; - $results{"SUMXX_$index"} += ($comp_ref{$index}**2); + $results{"SUMX_$index"} += $comp_ref{$index}; + $results{"SUMXX_$index"} += ($comp_ref{$index}**2); # track the number of ties - if (abs($diff) <= DEFAULT_PRECISION_SMALL) { - $results{"T_$index"} ++; - } + $results{"T_$index"} ++ + if (abs($diff) <= DEFAULT_PRECISION_SMALL); } return; @@ -2513,35 +2517,33 @@ sub get_zscore_from_comp_results { my %args = @_; # could alias this - my $comp_list_ref = $args{comp_list_ref} + \my %comp_list_ref = $args{comp_list_ref} // croak "comp_list_ref argument not specified\n"; # need the observed values - my $base_list_ref = $args{base_list_ref} + \my %base_list_ref = $args{base_list_ref} // croak "base_list_ref argument not specified\n"; my $results_list_ref = $args{results_list_ref} // {}; KEY: - foreach my $q_key (grep {$_ =~ /^Q_/} keys %$comp_list_ref) { - my $index_name = substr $q_key, 2; + foreach my $index_name (keys %base_list_ref) { - my $n = $comp_list_ref->{$q_key}; + my $n = $comp_list_ref{'Q_' . $index_name}; next KEY if !$n; my $x_key = 'SUMX_' . $index_name; my $xx_key = 'SUMXX_' . $index_name; # sum of x vals and x vals squared - my $sumx = $comp_list_ref->{$x_key}; - my $sumxx = $comp_list_ref->{$xx_key}; + my $sumx = $comp_list_ref{$x_key}; + my $sumxx = $comp_list_ref{$xx_key}; - my $z_key = $index_name; # n better be large, as we do not use n-1 - my $variance = max (0, ($sumxx - ($sumx**2) / $n) / $n); - my $obs = $base_list_ref->{$index_name}; - $results_list_ref->{$z_key} - = $variance - ? ($obs - ($sumx / $n)) / sqrt ($variance) + my $variance = ($sumxx - ($sumx**2) / $n) / $n; + + $results_list_ref->{$index_name} + = $variance > 0 + ? ($base_list_ref{$index_name} - ($sumx / $n)) / sqrt ($variance) : 0; } @@ -2666,15 +2668,19 @@ sub get_sig_rank_from_comp_results { my $self = shift; my %args = @_; - # could alias this \my %comp_list_ref = $args{comp_list_ref} // croak "comp_list_ref argument not specified\n"; \my %results_list_ref = $args{results_list_ref} // {}; - foreach my $c_key (grep {$_ =~ /^C_/} keys %comp_list_ref) { - - my $index_name = substr $c_key, 2; + # base_list_ref will usually be shorter so fewer comparisons will be needed + my @keys = $args{base_list_ref} + ? grep {exists $comp_list_ref{'C_' . $_}} keys %{$args{base_list_ref}} + : map {substr $_, 2} grep {$_ =~ /^C_/} keys %comp_list_ref; + + foreach my $index_name (@keys) { + + my $c_key = 'C_' . $index_name; if (!defined $comp_list_ref{$c_key}) { $results_list_ref{$index_name} = undef; diff --git a/lib/Biodiverse/Indices.pm b/lib/Biodiverse/Indices.pm index b4650b51e..48cbec882 100644 --- a/lib/Biodiverse/Indices.pm +++ b/lib/Biodiverse/Indices.pm @@ -12,7 +12,7 @@ use warnings; #use Data::Dumper; use Scalar::Util qw /blessed weaken/; use List::MoreUtils qw /uniq/; -use List::Util qw /sum/; +use List::Util qw /sum any/; use English ( -no_match_vars ); use Ref::Util qw { :all }; use JSON::MaybeXS; @@ -794,18 +794,10 @@ sub parse_dependencies_for_calc { $self->_convert_to_array( input => $required_args ); foreach my $required_arg ( sort @$reqd_args_a ) { - my $re = qr /^($required_arg)$/ - ; # match is used in the grep? Was used in now-removed code. - my $is_defined; - CALC_ARG: - foreach - my $calc_arg ( sort grep { $_ =~ $re } keys %$calc_args ) - { - if ( defined $calc_args->{$calc_arg} ) { - $is_defined++; - last CALC_ARG; - } - } + my $re = qr /^($required_arg)$/; + my $is_defined + = any { $_ =~ $re && defined $calc_args->{$_}} + sort keys %$calc_args; if ( !$is_defined ) { Biodiverse::Indices::MissingRequiredArguments->throw( @@ -1533,11 +1525,16 @@ sub run_dependencies { my $tmp = $self->get_param('AS_RESULTS_FROM_GLOBAL') || {}; my %as_results_from_global = %$tmp; # make a copy + state $cache_name_local_results = 'AS_RESULTS_FROM_LOCAL'; + # Now we run the calculations at this level. # We also keep track of what has been run # to avoid repetition through multiple dependencies. my %results; my %as_results_from; + # make sure this is new each iteration + $self->set_cached_value ($cache_name_local_results => \%as_results_from); + foreach my $calc (@$calc_list) { my $calc_results; @@ -1574,6 +1571,9 @@ sub run_dependencies { $results{$calc} = $calc_results; } + # We refresh each call above, but this ensures last one is cleaned up. + $self->delete_cached_value($cache_name_local_results); + if ( $type eq 'pre_calc_global' ) { $self->set_param( AS_RESULTS_FROM_GLOBAL => \%as_results_from_global ); } @@ -1663,6 +1663,9 @@ sub set_pairwise_mode { $self->{pairwise_mode} = $mode; + croak "Cannot have both pairwise and hierarchical modes on at the same time" + if $mode && $self->get_hierarchical_mode; + return $mode; } @@ -1671,6 +1674,24 @@ sub get_pairwise_mode { $_[0]->{pairwise_mode}; } +sub set_hierarchical_mode { + my ( $self, $mode ) = @_; + + $self->{hierarchical_mode} = $mode; + + croak "Cannot have both pairwise and hierarchical modes on at the same time" + if $mode && $self->get_pairwise_mode; + + return $mode; +} + +# potential hot path so optimise to avoid arg handling +sub get_hierarchical_mode { + $_[0]->{hierarchical_mode}; +} + + + 1; __END__ diff --git a/lib/Biodiverse/Indices/Endemism.pm b/lib/Biodiverse/Indices/Endemism.pm index 97d5a4f81..03cd22ce4 100644 --- a/lib/Biodiverse/Indices/Endemism.pm +++ b/lib/Biodiverse/Indices/Endemism.pm @@ -6,6 +6,9 @@ use 5.020; our $VERSION = '4.99_002'; +use experimental 'refaliasing'; + + my $metadata_class = 'Biodiverse::Metadata::Indices'; sub get_metadata_calc_endemism_central_normalised { @@ -295,11 +298,24 @@ sub get_metadata_calc_endemism_central_hier_part { } sub calc_endemism_central_hier_part { - my $self = shift; + my ($self, %args) = @_; + + # If we have no nbrs in set 2 then we are the same as the "whole" variant. + # So just grab its values if it has already been calculated. + if (!keys %{$args{label_hash2}}) { + my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL'); + my $cached = $cache_hash->{calc_endemism_whole_hier_part}; + if ($cached) { + my %remapped; + @remapped{map {$_ =~ s/ENDW/ENDC/r} keys %$cached} + = values %$cached; + return wantarray ? %remapped : \%remapped; + } + } return $self->_calc_endemism_hier_part ( - @_, - prefix => 'ENDC_HPART_', + %args, + prefix => 'ENDC_HPART_', ); } @@ -314,11 +330,25 @@ sub get_metadata_calc_endemism_whole_hier_part { } sub calc_endemism_whole_hier_part { - my $self = shift; + my ($self, %args) = @_; + + # If we have no nbrs in set 2 then we are the same as the "central" variant. + # So just grab its values if it has already been calculated. + if (!keys %{$args{label_hash2}}) { + my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL'); + my $cached = $cache_hash->{calc_endemism_central_hier_part}; + if ($cached) { + # say STDERR join ' ', sort keys %$cached; + my %remapped; + @remapped{map {$_ =~ s/ENDC/ENDW/r} keys %$cached} + = values %$cached; + return wantarray ? %remapped : \%remapped; + } + } return $self->_calc_endemism_hier_part ( - @_, - prefix => 'ENDW_HPART_', + %args, + prefix => 'ENDW_HPART_', ); } @@ -404,6 +434,7 @@ sub metadata_for_calc_endemism_hier_part { formula => $formula, pre_calc => [ "_calc_endemism_$endemism_type", + 'calc_abc2', ], pre_calc_global => 'get_basedata_labels_as_tree', uses_nbr_lists => 1, # how many sets of lists it must have @@ -429,21 +460,24 @@ sub _calc_endemism_hier_part { my @hash_ref_array = (); my @count_array = (); - my $total_count = 0; - while (my ($label, $wt) = each %$wt_list) { - my $contribution = $wt / $we; - $total_count ++; - my $node_ref = $tree->get_node_ref (node => $label); - my $node_name = $label; - - # climb the tree and add the contributions + my $total_count = keys %$wt_list; + + foreach my $label (keys %$wt_list) { + my $contribution = $wt_list->{$label} / $we; + + my $node_ref = $tree->get_node_ref_aa ($label); + + # Climb the tree and add the contributions. + # Depth is off by one so the root is $i==-1. my $i = $depth; - while (! $node_ref->is_root_node) { + # this call caches + my $path = $node_ref->get_path_name_array_to_root_node_aa; + + foreach my $node_name (@$path) { $hash_ref_array[$i]{$node_name} += $contribution; $count_array[$i]{$node_name} ++; $i--; - $node_ref = $node_ref->get_parent; - $node_name = $node_ref->get_name; + last if $i < 0; } } @@ -549,6 +583,15 @@ sub _calc_endemism_central { my $self = shift; my %args = @_; + # If we have no nbrs in set 2 then we are the same as the "whole" variant. + # So just grab its values if it has already been calculated. + if (!keys %{$args{label_hash2}}) { + my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL'); + my $cached = $cache_hash->{_calc_endemism_whole}; + return wantarray ? %$cached : $cached + if $cached; + } + return $self->_calc_endemism(%args, end_central => 1); } @@ -688,8 +731,18 @@ sub get_metadata__calc_endemism_whole { # wrapper sub sub _calc_endemism_whole { - my $self = shift; - return $self->_calc_endemism(@_, end_central => 0); + my ($self, %args) = @_; + + # If we have no nbrs in set 2 then we are the same as the "central" variant. + # So just grab its values if it has already been calculated. + if (!keys %{$args{label_hash2}}) { + my $cache_hash = $self->get_cached_value('AS_RESULTS_FROM_LOCAL'); + my $cached = $cache_hash->{_calc_endemism_central}; + return wantarray ? %$cached : $cached + if $cached; + } + + return $self->_calc_endemism(%args, end_central => 0); } # Calculate endemism. Private method called by others @@ -701,8 +754,7 @@ sub _calc_endemism { my $bd = $self->get_basedata_ref; # if element_list2 is specified and end_central == 1, - # then it will consider those elements in the local range calculations, - # but only use those labels that occur in the element_list1 + # then it will use the local ranges across sets 1 and 2 my $local_ranges = $args{label_hash_all}; my $label_list = $args{end_central} @@ -874,8 +926,9 @@ sub get_metadata__calc_endemism_absolute { my %metadata = ( description => $desc, name => 'Absolute endemism, internals', - uses_nbr_lists => 1, # how many sets of lists it must have - pre_calc => ['calc_abc2'], + uses_nbr_lists => 1, # how many sets of lists it must have + pre_calc => [ 'calc_abc2' ], + pre_calc_global => ['get_label_range_hash'] ); # add to if needed return $metadata_class->new(\%metadata); @@ -889,9 +942,11 @@ sub _calc_endemism_absolute { my $bd = $self->get_basedata_ref; - my $local_ranges = $args{label_hash_all}; - my $l_hash1 = $args{label_hash1}; - my $l_hash2 = $args{label_hash2}; + \my %local_ranges = $args{label_hash_all}; + \my %l_hash1 = $args{label_hash1}; + \my %l_hash2 = $args{label_hash2}; + + \my %ranges = $args{label_range_hash}; # allows us to use this for any other basedata get_* function my $function = 'get_range'; @@ -899,27 +954,28 @@ sub _calc_endemism_absolute { my ($end1, $end2, $end_all) = (0, 0, 0); my (%eh1, %eh2, %eh_all); - while (my ($sub_label, $local_range) = each %{$local_ranges}) { - my $range = $bd->$function (element => $sub_label); + foreach my $sub_label (keys %local_ranges) { + my $local_range = $local_ranges{$sub_label}; + my $range = $ranges{$sub_label}; next if $range > $local_range; # cannot be absolutely endemic $end_all++; $eh_all{$sub_label} = $local_range; - if (exists $l_hash1->{$sub_label} and $range <= $l_hash1->{$sub_label}) { + if ($l_hash1{$sub_label} and $range <= $l_hash1{$sub_label}) { $end1++; $eh1{$sub_label} = $local_range; } - if (exists $l_hash2->{$sub_label} and $range <= $l_hash2->{$sub_label}) { + if ($l_hash2{$sub_label} and $range <= $l_hash2{$sub_label}) { $end2++; $eh2{$sub_label} = $local_range; } } - my $end1_p = eval {$end1 / scalar keys %$l_hash1}; - my $end2_p = eval {$end2 / scalar keys %$l_hash2}; - my $end_all_p = eval {$end_all / scalar keys %$local_ranges}; + my $end1_p = eval {$end1 / scalar keys %l_hash1}; + my $end2_p = eval {$end2 / scalar keys %l_hash2}; + my $end_all_p = eval {$end_all / scalar keys %local_ranges}; my %results = ( END_ABS1 => $end1, diff --git a/lib/Biodiverse/Indices/Phylogenetic.pm b/lib/Biodiverse/Indices/Phylogenetic.pm index 707bcdb05..ab6e17918 100644 --- a/lib/Biodiverse/Indices/Phylogenetic.pm +++ b/lib/Biodiverse/Indices/Phylogenetic.pm @@ -576,14 +576,20 @@ sub get_path_lengths_to_root_node { my $cache = !$args{no_cache}; #$cache = 0; # turn it off for debug my $el_list = $args{el_list} // []; - + + return $self->_get_path_lengths_to_root_node_hierarchical(%args) + if defined $args{current_node_details} + && $self->get_hierarchical_mode + && scalar @{$args{element_list1} //[]} > 1; + # have we cached it? - #my $use_path_cache = $cache && $self->get_pairwise_mode(); + # caching makes sense only if we have + # only one element (group) containing labels + # or we are in hierarchical mode, but that is handled separately my $use_path_cache = $cache && $self->get_param('USE_PATH_LENGTH_CACHE_BY_GROUP') - && scalar @$el_list == 1; # caching makes sense only if we have - # only one element (group) containing labels + && scalar @$el_list == 1; if ($use_path_cache) { my $cache_h = $args{path_length_cache}; @@ -679,6 +685,33 @@ sub get_path_lengths_to_root_node { return wantarray ? %$path_hash : $path_hash; } +sub _get_path_lengths_to_root_node_hierarchical { + my ($self, %args) = @_; + + my $node_data = $args{current_node_details} + // croak 'Must pass the current node details when in hierarchical mode'; + my $node_name = $node_data->{name} + // croak 'Missing current node name in hierarchical mode'; + my $child_names = $node_data->{child_names}; + + my $cache_h = $args{path_length_cache}; + + my %path_combined; + + foreach my $child (@$child_names) { + my $path = $cache_h->{$child}; + if (!$path) { + # need to calculate it + delete local $args{current_node_details}; + $path = $self->get_path_lengths_to_root_node(%args); + } + @path_combined{keys %$path} = values %$path; + } + $cache_h->{$node_name} = \%path_combined; + + return wantarray ? %path_combined : \%path_combined; +} + sub get_metadata_calc_pe { @@ -1627,6 +1660,15 @@ sub calc_labels_not_on_tree { my $not_on_tree = $args{labels_not_on_tree}; + if (!keys %$not_on_tree) { + my $res = { + PHYLO_LABELS_NOT_ON_TREE => {}, + PHYLO_LABELS_NOT_ON_TREE_N => 0, + PHYLO_LABELS_NOT_ON_TREE_P => 0, + }; + return wantarray ? %$res : $res; + } + my %labels1 = %{$args{label_hash_all}}; my $richness = scalar keys %labels1; delete @labels1{keys %$not_on_tree}; diff --git a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm index 1ed764e44..c4f42c8a3 100644 --- a/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm +++ b/lib/Biodiverse/Indices/Phylogenetic/RefAlias.pm @@ -1,6 +1,9 @@ package Biodiverse::Indices::Phylogenetic::RefAlias; use strict; use warnings; +use 5.022; + +use Carp qw/croak/; our $VERSION = '4.99_002'; @@ -11,11 +14,17 @@ use List::Util qw /sum/; sub _calc_pe { my $self = shift; - my %args = @_; + my %args = @_; + + my $element_list_all = $args{element_list_all}; + + return $self->_calc_pe_hierarchical(%args) + if defined $args{current_node_details} + && $self->get_hierarchical_mode + && @$element_list_all > 1; my $tree_ref = $args{trimmed_tree}; my $results_cache = $args{PE_RESULTS_CACHE}; - my $element_list_all = $args{element_list_all}; \my %node_ranges = $args{node_range}; \my %rw_node_lengths = $args{inverse_range_weighted_node_lengths}; @@ -148,6 +157,102 @@ sub _calc_pe { return wantarray ? %results : \%results; } +# _calc_pe but taking advantage of hierarchical structures +# requires it be called bottom up +sub _calc_pe_hierarchical { + my ($self, %args) = @_; + + my $element_list_all = $args{element_list_all}; + + my $node_data = $args{current_node_details} + // croak 'Must pass the current node details when in hierarchical mode'; + my $node_name = $node_data->{name} + // croak 'Missing current node name in hierarchical mode'; + my $child_names = $node_data->{child_names}; + + my $tree_ref = $args{trimmed_tree}; + my $results_cache = $args{PE_RESULTS_CACHE}; + \my %node_ranges = $args{node_range}; + + # default these to undef - more meaningful than zero + my ($PE_WE, $PE_WE_P); + + my (%wts, %local_ranges, %results); + + foreach my $group (@$child_names) { + my $results_this_gp; + # use the cached results for a group if present + if (exists $results_cache->{$group}) { + $results_this_gp = $results_cache->{$group}; + } + else { + # do it the hard way + delete local $args{current_node_details}; + $results_this_gp + = $results_cache->{$group} + = $self->_calc_pe (%args); + } + + if (defined $results_this_gp->{PE_WE}) { + $PE_WE += $results_this_gp->{PE_WE}; + } + + # Avoid some redundant slicing and dicing when we have only one group + # Pays off when processing large data sets + if (scalar @$element_list_all == 1) { + # no need to collate anything so make a shallow copy + @results{keys %$results_this_gp} = values %$results_this_gp; + # but we do need to add to the local range hash + my $hashref = $results_this_gp->{PE_WTLIST}; + @local_ranges{keys %$hashref} = (1) x scalar keys %$hashref; + } + else { + # refalias might be a nano-optimisation here... + \my %wt_hash = $results_this_gp->{PE_WTLIST}; + # weights need to be summed, + # unless we are starting from a blank slate + if (keys %wts) { + foreach my $node (keys %wt_hash) { + $wts{$node} += $wt_hash{$node}; + $local_ranges{$node}++; + } + } + else { + %wts = %wt_hash; + @local_ranges{keys %wt_hash} = (1) x scalar keys %wt_hash; + } + } + } + + { + no warnings 'uninitialized'; + my $total_tree_length = $tree_ref->get_total_tree_length; + + #Phylogenetic endemism = sum for all nodes of: + # (branch length/total tree length) / node range + $PE_WE_P = eval {$PE_WE / $total_tree_length}; + } + + # need the collated versions for multiple elements + if (scalar @$element_list_all > 1) { + $results{PE_WE} = $PE_WE; + $results{PE_WTLIST} = \%wts; + my %nranges = %node_ranges{keys %wts}; + $results{PE_RANGELIST} = \%nranges; + } + else { + my %nranges = %node_ranges{keys %{$results{PE_WTLIST}}}; + $results{PE_RANGELIST} = \%nranges; + } + + # need to set these + $results{PE_WE_P} = $PE_WE_P; + $results{PE_LOCAL_RANGELIST} = \%local_ranges; + + $results_cache->{$node_name} = {%results{qw/PE_WE PE_WTLIST/}}; + + return wantarray ? %results : \%results; +} 1; diff --git a/lib/Biodiverse/Randomise.pm b/lib/Biodiverse/Randomise.pm index 22a12387e..0439ec988 100644 --- a/lib/Biodiverse/Randomise.pm +++ b/lib/Biodiverse/Randomise.pm @@ -973,12 +973,15 @@ sub compare_cluster_calcs_per_node { # Cloning via newick format clears all the params, # so avoids lingering basedata refs and the like require Biodiverse::ReadNexus; - + my $read_nexus = Biodiverse::ReadNexus->new; $read_nexus->import_newick (data => $orig_analysis->to_newick); my @tree_array = $read_nexus->get_tree_array; my $clone = $tree_array[0]; bless $clone, blessed ($orig_analysis); + # This is more direct but actually takes longer under profiling. + # Also does not clean out the previous lists. + # my $clone = $orig_analysis->clone_without_caches; $clone->rename (new_name => $orig_analysis->get_param ('NAME') . ' rand sp_calc' . $args{rand_iter}); my %clone_analysis_args = %$analysis_args; diff --git a/lib/Biodiverse/Spatial.pm b/lib/Biodiverse/Spatial.pm index f9fb567ed..f84d8fe79 100644 --- a/lib/Biodiverse/Spatial.pm +++ b/lib/Biodiverse/Spatial.pm @@ -511,7 +511,16 @@ sub convert_comparisons_to_significances { list => $result_list_name, ); + # this will result in fewer greps inside the sig rank sub + my $base_ref_name = $list_name =~ s/.+>>//r; + my $base_list_ref = $self->get_list_ref( + element => $element, + list => $base_ref_name, + autovivify => 0, + ); + $self->get_sig_rank_from_comp_results ( + base_list_ref => $base_list_ref, comp_list_ref => $comp_ref, results_list_ref => $result_list_ref, # do it in-place ); diff --git a/lib/Biodiverse/Tree.pm b/lib/Biodiverse/Tree.pm index 396013404..3d20b53ba 100644 --- a/lib/Biodiverse/Tree.pm +++ b/lib/Biodiverse/Tree.pm @@ -2,7 +2,7 @@ package Biodiverse::Tree; # Package to build and store trees. # includes clustering methods -use 5.010; +use 5.020; use Carp; use strict; @@ -2251,7 +2251,12 @@ sub convert_comparisons_to_significances { ); } + # this will result in fewer greps inside the sig rank sub + my $base_ref_name = $list_name =~ s/.+>>//r; + my $base_list_ref = $base_node->get_list_ref_aa( $base_ref_name ); + $self->get_sig_rank_from_comp_results( + base_list_ref => $base_list_ref, comp_list_ref => $comp_ref, results_list_ref => $result_list_ref, # do it in-place ); @@ -2268,7 +2273,7 @@ sub convert_comparisons_to_zscores { if !defined $result_list_pfx; my $progress = Biodiverse::Progress->new(); - my $progress_text = "Calculating significances"; + my $progress_text = "Calculating z-scores"; $progress->update( $progress_text, 0 ); # find all the relevant lists for this target name @@ -3115,19 +3120,29 @@ sub clone_without_caches { # maybe should generate a new version but blessing and parenting might take longer my %saved_node_caches; + my %params = $self->get_params_hash; + my $new_tree = do { # we have to delete the new tree's caches so avoid cloning them in the first place delete local $self->{_cache}; + delete local $self->{PARAMS} or say STDERR 'woap?'; # seem not to be able to use delete local on compound structure # or maybe it is the foreach loop even though postfix $saved_node_caches{$_} = delete $self->{TREE_BY_NAME}{$_}{_cache} foreach keys %{$self->{TREE_BY_NAME}}; $self->clone; }; - # reinstate the caches + + # reinstate the caches and other settings on the original tree + # could be done as a defer block with a more recent perl $self->{TREE_BY_NAME}{$_}{_cache} = $saved_node_caches{$_} foreach keys %{$self->{TREE_BY_NAME}}; + # assign the basic params + foreach my $param (qw /OUTSUFFIX OUTSUFFIX_YAML/) { + $new_tree->set_param($param => $params{$param}); + } + # reset all the total length values $new_tree->reset_total_length; diff --git a/lib/Biodiverse/TreeNode.pm b/lib/Biodiverse/TreeNode.pm index ef78c7455..db297d372 100644 --- a/lib/Biodiverse/TreeNode.pm +++ b/lib/Biodiverse/TreeNode.pm @@ -1559,15 +1559,12 @@ sub get_shared_ancestor { # get the list of hashes in the nodes sub get_hash_lists { my $self = shift; - my %args = @_; - - my @list; - foreach my $tmp (keys %{$self}) { - next if $tmp =~ /^_/; # skip the internals - push @list, $tmp if is_hashref($self->{$tmp}); - } - return @list if wantarray; - return \@list; + + my @list + = grep {$_ !~ /^_/ and is_hashref $self->{$_}} + keys %$self; + + return wantarray ? @list : \@list; } sub get_hash_lists_below { @@ -1577,9 +1574,11 @@ sub get_hash_lists_below { my %hash_list; @hash_list{@list} = undef; - foreach my $child ($self->get_children) { - my $list_below = $child->get_hash_lists_below; + my @children = $self->get_children; + while (my $child = shift @children) { + my $list_below = $child->get_hash_lists; @hash_list{@$list_below} = undef; + push @children, $child->get_children; } return wantarray @@ -2446,14 +2445,22 @@ sub add_to_lists { $self->{$list} = $values; } elsif (is_hashref($values)) { - $self->{$list} = {} if ! exists $self->{$list}; - next if ! scalar keys %$values; - @{$self->{$list}}{keys %$values} = values %$values; # add using a slice - } + if (!$self->{$list}) { + $self->{$list} = {%$values}; + } + else { + next if !scalar keys %$values; + @{$self->{$list}}{keys %$values} = values %$values; + } + } elsif (is_arrayref($values)) { - $self->{$list} = [] if ! exists $self->{$list}; - next if ! scalar @$values; - push @{$self->{$list}}, @$values; + if (!$self->{$list}) { + $self->{$list} = [@$values]; + } + else { + next if !scalar @$values; + push @{$self->{$list}}, @$values; + } } else { croak "add_to_lists warning, no valid list ref passed\n"; diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index 22b52830e..0100c5278 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -216,6 +216,57 @@ sub test_rw_turnover_mx { } +sub test_cluster_node_calcs { + + my %args = @_; + my $bd = $args{basedata_ref} || get_basedata_object_from_site_data(CELL_SIZES => [300000, 300000]); + + my $prng_seed = $args{prng_seed} || $default_prng_seed; + my $tree_ref = $args{tree_ref} || get_tree_object_from_sample_data(); + + my $calcs = [qw/calc_pe calc_pd/]; + + my $cl1 = $bd->add_cluster_output (name => 'cl1'); + $cl1->run_analysis ( + prng_seed => $prng_seed, + ); + $cl1->run_spatial_calculations ( + tree_ref => $tree_ref, + spatial_calculations => $calcs, + no_hierarchical_mode => 1, + ); + my $cl2 = $bd->add_cluster_output (name => 'cl2'); + $cl2->run_analysis ( + prng_seed => $prng_seed, + ); + $cl2->run_spatial_calculations ( + tree_ref => $tree_ref, + spatial_calculations => $calcs, + no_hierarchical_mode => 0, + ); + + my $node_hash1 = $cl1->get_node_hash; + my $node_hash2 = $cl2->get_node_hash; + + is [sort keys %$node_hash1], [sort keys %$node_hash2], 'paranoia check: same node names'; + + my (%aggregate1, %aggregate2); + foreach my $node_name (sort keys %$node_hash1) { + my $node1 = $node_hash1->{$node_name}; + my $node2 = $node_hash2->{$node_name}; + + foreach my $list_name (sort grep {$_ !~ /NODE_VALUES/}$node1->get_hash_lists) { + my $ref1 = $node1->get_list_ref_aa($list_name); + my $ref2 = $node2->get_list_ref_aa($list_name); + my $snapped1 = {map {$_ => sprintf "%.10f", $ref1->{$_}} keys %$ref1}; + my $snapped2 = {map {$_ => sprintf "%.10f", $ref2->{$_}} keys %$ref2}; + $aggregate1{$node_name}{$list_name} = $snapped1; + $aggregate2{$node_name}{$list_name} = $snapped2; + } + } + is \%aggregate2, \%aggregate1, 'same per-node index results with and without hierarchical mode'; +} + __DATA__ @@ CLUSTER_MINI_DATA