Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Cluster calcs per node: clean up existing results first #939

Merged
merged 2 commits into from
Jun 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 39 additions & 1 deletion lib/Biodiverse/Cluster.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1893,6 +1893,7 @@ sub override_cached_spatial_calculations_arg {
my %new_analysis_args = %$analysis_args;
$new_analysis_args{spatial_calculations} = $spatial_calculations;
$self->set_param (ANALYSIS_ARGS => \%new_analysis_args);
$self->set_param (SP_CALC_COUNT => 0);

return $spatial_calculations;
}
Expand Down Expand Up @@ -2757,6 +2758,8 @@ sub sp_calc {

my $indices_object = Biodiverse::Indices->new(BASEDATA_REF => $bd);

my $prev_sp_calc_count = $self->get_param('SP_CALC_COUNT');

if (! exists $args{calculations}) {
if (defined $args{analyses}) {
warn "Use of argument 'analyses' is deprecated from version 0.13\n"
Expand Down Expand Up @@ -2797,6 +2800,31 @@ sub sp_calc {
my $count = 0;
my $tree_name = $self->get_param ('NAME');

# If we are a re-run then we need to clean up first.
# But we only track this from version 4.99_003 so also
# go looking if the number of previous runs is undef.
my $existing_list_indices;
if ($prev_sp_calc_count || !defined $prev_sp_calc_count) {
$existing_list_indices = $self->find_list_indices_across_nodes(
indices_object => $indices_object
);
if (keys %$existing_list_indices) {
# and any randomisation lists
if (my @rand_names = $bd->get_randomisation_output_names) {
# should be refined
my $names = join '|', @rand_names;
my $re = qr/^(?:$names)>>/;
my @rand_lists = grep {$_ =~ $re} $self->get_list_names_below;
@$existing_list_indices{@rand_lists} = ();
}
# now make it an array as that is what is needed below
$existing_list_indices = [ keys %$existing_list_indices ];
}
else {
$existing_list_indices = undef;
}
}

print "[CLUSTER] Progress (% of $to_do nodes): ";
my $progress_bar = Biodiverse::Progress->new();
\my @node_refs = $self->get_node_refs;
Expand All @@ -2805,6 +2833,10 @@ sub sp_calc {
foreach my $node (rnkeysort {$_->get_depth} @node_refs) {
$count ++;

if ($existing_list_indices) {
$node->delete_lists(lists => $existing_list_indices);
}

$progress_bar->update (
"Cluster spatial analysis\n"
. "$tree_name\n(node $count / $to_do)",
Expand Down Expand Up @@ -2834,7 +2866,10 @@ sub sp_calc {
delete $sp_calc_values{$key};
}
}
$node->add_to_lists (SPATIAL_RESULTS => \%sp_calc_values);
$node->add_to_lists (
SPATIAL_RESULTS => \%sp_calc_values,
use_ref => 1, # ensure we override if recalculating
);
}

# run any global post_calcs
Expand All @@ -2844,6 +2879,9 @@ sub sp_calc {

$indices_object->set_hierarchical_mode(0);

$prev_sp_calc_count++;
$self->set_param(SP_CALC_COUNT => $prev_sp_calc_count);

return 1;
}

Expand Down
60 changes: 54 additions & 6 deletions t/26-Cluster2.t
Original file line number Diff line number Diff line change
Expand Up @@ -228,18 +228,14 @@ sub test_cluster_node_calcs {

my $cl1 = $bd->add_cluster_output (name => 'cl1');
$cl1->run_analysis (
prng_seed => $prng_seed,
);
$cl1->run_spatial_calculations (
prng_seed => $prng_seed,
tree_ref => $tree_ref,
spatial_calculations => $calcs,
no_hierarchical_mode => 1,
);
my $cl2 = $bd->add_cluster_output (name => 'cl2');
$cl2->run_analysis (
prng_seed => $prng_seed,
);
$cl2->run_spatial_calculations (
prng_seed => $prng_seed,
tree_ref => $tree_ref,
spatial_calculations => $calcs,
no_hierarchical_mode => 0,
Expand All @@ -266,6 +262,58 @@ sub test_cluster_node_calcs {
}
}
is \%aggregate2, \%aggregate1, 'same per-node index results with and without hierarchical mode';

# we need some rand lists to make sure they get cleaned up
for my $i (1..2) {
my $rand = $bd->add_randomisation_output(name => 'testing' . $i);
my $success = $rand->run_analysis(
function => 'rand_csr_by_group',
iterations => 1,
);
ok($success, "ran randomisation $i successfully");
}

# now test we clean up when re-run
my @lists1 = $cl1->get_list_names_below;
my (%sp_res_keys1, %sp_res_keys2);
foreach my $node ($cl1->get_node_refs) {
my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS');
@sp_res_keys1{keys %$sp_res_ref} = undef;
}

my $list_indices1 = $cl1->find_list_indices_across_nodes;
# diag join ' ', sort keys %$list_indices1;

$calcs = [qw/calc_pd/];
$cl1->run_analysis (
tree_ref => $tree_ref,
spatial_calculations => $calcs,
no_hierarchical_mode => 1,
);
my @lists2 = $cl1->get_list_names_below;

# diag join ' ', sort @lists1;
# diag join ' ', sort @lists2;
isnt [sort @lists2],
[sort @lists1],
"Node lists differ after recalculation of indices";

is ((scalar grep {/>>/} @lists1), 46, 'output contained randomisation lists');
is ((scalar grep {/>>/} @lists2), 0, 'no randomisation lists remaining after recalc');

foreach my $node ($cl1->get_node_refs) {
my $sp_res_ref = $node->get_list_ref_aa('SPATIAL_RESULTS');
@sp_res_keys2{keys %$sp_res_ref} = undef;
}

isnt [sort keys %sp_res_keys2],
[sort keys %sp_res_keys1],
"SPATIAL_RESULTS contents differ after recalculation of indices";

my $list_indices2 = $cl1->find_list_indices_across_nodes;
isnt $list_indices2, $list_indices1, "List indices across nodes not the same";
# diag join ' ', sort keys %$list_indices2;

}

__DATA__
Expand Down
Loading