Skip to content

Commit

Permalink
Merge pull request #939 from shawnlaffan/issue_924_cluster_calc_cleanup
Browse files Browse the repository at this point in the history
Cluster calcs per node: clean up existing results first

Fixes #924
  • Loading branch information
shawnlaffan authored Jun 2, 2024
2 parents d30a21b + 45ddc01 commit 046367f
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 7 deletions.
40 changes: 39 additions & 1 deletion lib/Biodiverse/Cluster.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,7 @@ sub override_cached_spatial_calculations_arg {
my %new_analysis_args = %$analysis_args;
$new_analysis_args{spatial_calculations} = $spatial_calculations;
$self->set_param (ANALYSIS_ARGS => \%new_analysis_args);
$self->set_param (SP_CALC_COUNT => 0);

return $spatial_calculations;
}
Expand Down Expand Up @@ -2757,6 +2758,8 @@ sub sp_calc {

my $indices_object = Biodiverse::Indices->new(BASEDATA_REF => $bd);

my $prev_sp_calc_count = $self->get_param('SP_CALC_COUNT');

if (! exists $args{calculations}) {
if (defined $args{analyses}) {
warn "Use of argument 'analyses' is deprecated from version 0.13\n"
Expand Down Expand Up @@ -2797,6 +2800,31 @@ sub sp_calc {
my $count = 0;
my $tree_name = $self->get_param ('NAME');

# If we are a re-run then we need to clean up first.
# But we only track this from version 4.99_003 so also
# go looking if the number of previous runs is undef.
my $existing_list_indices;
if ($prev_sp_calc_count || !defined $prev_sp_calc_count) {
$existing_list_indices = $self->find_list_indices_across_nodes(
indices_object => $indices_object
);
if (keys %$existing_list_indices) {
# and any randomisation lists
if (my @rand_names = $bd->get_randomisation_output_names) {
# should be refined
my $names = join '|', @rand_names;
my $re = qr/^(?:$names)>>/;
my @rand_lists = grep {$_ =~ $re} $self->get_list_names_below;
@$existing_list_indices{@rand_lists} = ();
}
# now make it an array as that is what is needed below
$existing_list_indices = [ keys %$existing_list_indices ];
}
else {
$existing_list_indices = undef;
}
}

print "[CLUSTER] Progress (% of $to_do nodes): ";
my $progress_bar = Biodiverse::Progress->new();
\my @node_refs = $self->get_node_refs;
Expand All @@ -2805,6 +2833,10 @@ sub sp_calc {
foreach my $node (rnkeysort {$_->get_depth} @node_refs) {
$count ++;

if ($existing_list_indices) {
$node->delete_lists(lists => $existing_list_indices);
}

$progress_bar->update (
"Cluster spatial analysis\n"
. "$tree_name\n(node $count / $to_do)",
Expand Down Expand Up @@ -2834,7 +2866,10 @@ sub sp_calc {
delete $sp_calc_values{$key};
}
}
$node->add_to_lists (SPATIAL_RESULTS => \%sp_calc_values);
$node->add_to_lists (
SPATIAL_RESULTS => \%sp_calc_values,
use_ref => 1, # ensure we override if recalculating
);
}

# run any global post_calcs
Expand All @@ -2844,6 +2879,9 @@ sub sp_calc {

$indices_object->set_hierarchical_mode(0);

$prev_sp_calc_count++;
$self->set_param(SP_CALC_COUNT => $prev_sp_calc_count);

return 1;
}

Expand Down
60 changes: 54 additions & 6 deletions t/26-Cluster2.t
Original file line number Diff line number Diff line change
Expand Up @@ -228,18 +228,14 @@ sub test_cluster_node_calcs {

my $cl1 = $bd->add_cluster_output (name => 'cl1');
$cl1->run_analysis (
prng_seed => $prng_seed,
);
$cl1->run_spatial_calculations (
prng_seed => $prng_seed,
tree_ref => $tree_ref,
spatial_calculations => $calcs,
no_hierarchical_mode => 1,
);
my $cl2 = $bd->add_cluster_output (name => 'cl2');
$cl2->run_analysis (
prng_seed => $prng_seed,
);
$cl2->run_spatial_calculations (
prng_seed => $prng_seed,
tree_ref => $tree_ref,
spatial_calculations => $calcs,
no_hierarchical_mode => 0,
Expand All @@ -266,6 +262,58 @@ sub test_cluster_node_calcs {
}
}
is \%aggregate2, \%aggregate1, 'same per-node index results with and without hierarchical mode';

# we need some rand lists to make sure they get cleaned up
for my $i (1..2) {
my $rand = $bd->add_randomisation_output(name => 'testing' . $i);
my $success = $rand->run_analysis(
function => 'rand_csr_by_group',
iterations => 1,
);
ok($success, "ran randomisation $i successfully");
}

# now test we clean up when re-run
my @lists1 = $cl1->get_list_names_below;
my (%sp_res_keys1, %sp_res_keys2);
foreach my $node ($cl1->get_node_refs) {
my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS');
@sp_res_keys1{keys %$sp_res_ref} = undef;
}

my $list_indices1 = $cl1->find_list_indices_across_nodes;
# diag join ' ', sort keys %$list_indices1;

$calcs = [qw/calc_pd/];
$cl1->run_analysis (
tree_ref => $tree_ref,
spatial_calculations => $calcs,
no_hierarchical_mode => 1,
);
my @lists2 = $cl1->get_list_names_below;

# diag join ' ', sort @lists1;
# diag join ' ', sort @lists2;
isnt [sort @lists2],
[sort @lists1],
"Node lists differ after recalculation of indices";

is ((scalar grep {/>>/} @lists1), 46, 'output contained randomisation lists');
is ((scalar grep {/>>/} @lists2), 0, 'no randomisation lists remaining after recalc');

foreach my $node ($cl1->get_node_refs) {
my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS');
@sp_res_keys2{keys %$sp_res_ref} = undef;
}

isnt [sort keys %sp_res_keys2],
[sort keys %sp_res_keys1],
"SPATIAL_RESULTS contents differ after recalculation of indices";

my $list_indices2 = $cl1->find_list_indices_across_nodes;
isnt $list_indices2, $list_indices1, "List indices across nodes not the same";
# diag join ' ', sort keys %$list_indices2;

}

__DATA__
Expand Down

0 comments on commit 046367f

Please sign in to comment.