From b211c5bbd96fbfbf4ab48d916170c17590cb4d05 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Sun, 2 Jun 2024 22:24:23 +1000 Subject: [PATCH 1/2] Cluster calcs per node: clean up existing results first Otherwise we can be left with results that are no longer needed. --- lib/Biodiverse/Cluster.pm | 40 ++++++++++++++++++++++++++- t/26-Cluster2.t | 57 ++++++++++++++++++++++++++++++++++----- 2 files changed, 90 insertions(+), 7 deletions(-) diff --git a/lib/Biodiverse/Cluster.pm b/lib/Biodiverse/Cluster.pm index d3e06b34e..62a6cb1a8 100644 --- a/lib/Biodiverse/Cluster.pm +++ b/lib/Biodiverse/Cluster.pm @@ -1893,6 +1893,7 @@ sub override_cached_spatial_calculations_arg { my %new_analysis_args = %$analysis_args; $new_analysis_args{spatial_calculations} = $spatial_calculations; $self->set_param (ANALYSIS_ARGS => \%new_analysis_args); + $self->set_param (SP_CALC_COUNT => 0); return $spatial_calculations; } @@ -2757,6 +2758,8 @@ sub sp_calc { my $indices_object = Biodiverse::Indices->new(BASEDATA_REF => $bd); + my $prev_sp_calc_count = $self->get_param('SP_CALC_COUNT'); + if (! exists $args{calculations}) { if (defined $args{analyses}) { warn "Use of argument 'analyses' is deprecated from version 0.13\n" @@ -2797,6 +2800,31 @@ sub sp_calc { my $count = 0; my $tree_name = $self->get_param ('NAME'); + # If we are a re-run then we need to clean up first. + # But we only track this from version 4.99_003 so also + # go looking if the number of previous runs is undef. + my $existing_list_indices; + if ($prev_sp_calc_count || !defined $prev_sp_calc_count) { + $existing_list_indices = $self->find_list_indices_across_nodes( + indices_object => $indices_object + ); + if (keys %$existing_list_indices) { + # and any randomisation lists + if (my @rand_names = $bd->get_randomisation_output_names) { + # should be refined + my $names = join '|', @rand_names; + my $re = qr/^(?:$names)>>/; + my @rand_lists = grep {$_ =~ $re} $self->get_list_names_below; + @$existing_list_indices{@rand_lists} = (); + } + # now make it an array as that is what is needed below + $existing_list_indices = [ keys %$existing_list_indices ]; + } + else { + $existing_list_indices = undef; + } + } + print "[CLUSTER] Progress (% of $to_do nodes): "; my $progress_bar = Biodiverse::Progress->new(); \my @node_refs = $self->get_node_refs; @@ -2805,6 +2833,10 @@ sub sp_calc { foreach my $node (rnkeysort {$_->get_depth} @node_refs) { $count ++; + if ($existing_list_indices) { + $node->delete_lists(lists => $existing_list_indices); + } + $progress_bar->update ( "Cluster spatial analysis\n" . "$tree_name\n(node $count / $to_do)", @@ -2834,7 +2866,10 @@ sub sp_calc { delete $sp_calc_values{$key}; } } - $node->add_to_lists (SPATIAL_RESULTS => \%sp_calc_values); + $node->add_to_lists ( + SPATIAL_RESULTS => \%sp_calc_values, + use_ref => 1, # ensure we override if recalculating + ); } # run any global post_calcs @@ -2844,6 +2879,9 @@ sub sp_calc { $indices_object->set_hierarchical_mode(0); + $prev_sp_calc_count++; + $self->set_param(SP_CALC_COUNT => $prev_sp_calc_count); + return 1; } diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index 052f24325..2ce687ab1 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -228,18 +228,14 @@ sub test_cluster_node_calcs { my $cl1 = $bd->add_cluster_output (name => 'cl1'); $cl1->run_analysis ( - prng_seed => $prng_seed, - ); - $cl1->run_spatial_calculations ( + prng_seed => $prng_seed, tree_ref => $tree_ref, spatial_calculations => $calcs, no_hierarchical_mode => 1, ); my $cl2 = $bd->add_cluster_output (name => 'cl2'); $cl2->run_analysis ( - prng_seed => $prng_seed, - ); - $cl2->run_spatial_calculations ( + prng_seed => $prng_seed, tree_ref => $tree_ref, spatial_calculations => $calcs, no_hierarchical_mode => 0, @@ -266,6 +262,55 @@ sub test_cluster_node_calcs { } } is \%aggregate2, \%aggregate1, 'same per-node index results with and without hierarchical mode'; + + # we need some rand lists to make sure they get cleaned up + for my $i (1..2) { + my $rand = $bd->add_randomisation_output(name => 'testing' . $i); + my $success = $rand->run_analysis( + function => 'rand_csr_by_group', + iterations => 1, + ); + ok($success, "ran randomisation $i successfully"); + } + + # now test we clean up when re-run + my @lists1 = $cl1->get_list_names_below; + my (%sp_res_keys1, %sp_res_keys2); + foreach my $node ($cl1->get_node_refs) { + my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS'); + @sp_res_keys1{keys %$sp_res_ref} = undef; + } + + my $list_indices1 = $cl1->find_list_indices_across_nodes; + # diag join ' ', sort keys %$list_indices1; + + $calcs = [qw/calc_pd/]; + $cl1->run_analysis ( + tree_ref => $tree_ref, + spatial_calculations => $calcs, + no_hierarchical_mode => 1, + ); + my @lists2 = $cl1->get_list_names_below; + + # diag join ' ', sort @lists1; + # diag join ' ', sort @lists2; + isnt [sort @lists2], + [sort @lists1], + "Node lists differ after recalculation of indices"; + + foreach my $node ($cl1->get_node_refs) { + my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS'); + @sp_res_keys2{keys %$sp_res_ref} = undef; + } + + isnt [sort keys %sp_res_keys2], + [sort keys %sp_res_keys1], + "SPATIAL_RESULTS contents differ after recalculation of indices"; + + my $list_indices2 = $cl1->find_list_indices_across_nodes; + isnt $list_indices2, $list_indices1, "List indices across nodes not the same"; + # diag join ' ', sort keys %$list_indices2; + } __DATA__ From 45ddc014842a17a486988197bda4a34dc2d86c3e Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 3 Jun 2024 07:50:17 +1000 Subject: [PATCH 2/2] Add more tests for cluster spatial recalc --- t/26-Cluster2.t | 3 +++ 1 file changed, 3 insertions(+) diff --git a/t/26-Cluster2.t b/t/26-Cluster2.t index 2ce687ab1..da73c375b 100644 --- a/t/26-Cluster2.t +++ b/t/26-Cluster2.t @@ -298,6 +298,9 @@ sub test_cluster_node_calcs { [sort @lists1], "Node lists differ after recalculation of indices"; + is ((scalar grep {/>>/} @lists1), 46, 'output contained randomisation lists'); + is ((scalar grep {/>>/} @lists2), 0, 'no randomisation lists remaining after recalc'); + foreach my $node ($cl1->get_node_refs) { my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS'); @sp_res_keys2{keys %$sp_res_ref} = undef;