Skip to content

Commit

Permalink
Randomisations: Add neighbourhood swapping to curveball
Browse files Browse the repository at this point in the history
  • Loading branch information
shawnlaffan committed Nov 6, 2024
1 parent 58e6d5a commit cd836a9
Show file tree
Hide file tree
Showing 3 changed files with 98 additions and 13 deletions.
6 changes: 4 additions & 2 deletions lib/Biodiverse/Randomise.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1387,6 +1387,7 @@ sub get_spatial_output_for_label_allocation {
my ($self, %args) = @_;

my $sp_conditions = $args{spatial_conditions_for_label_allocation};
my $param_name = $args{param_name} // 'SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION';

return if !defined $sp_conditions;

Expand All @@ -1409,7 +1410,7 @@ sub get_spatial_output_for_label_allocation {

return if !length $sp_check_text; # all we had was whitespace and comments

my $sp = $self->get_param('SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION');
my $sp = $self->get_param($param_name);

return $sp if $sp;

Expand All @@ -1423,6 +1424,7 @@ sub get_spatial_output_for_label_allocation {
#definition_query => $def_query, # do we want a def query for this? Prob not.
calculations => [],
override_valid_analysis_check => 1,
elements_to_calc => $args{elements_to_calc},
calc_only_elements_to_calc => 1, # really need to rename this undocumented arg
);
};
Expand All @@ -1432,7 +1434,7 @@ sub get_spatial_output_for_label_allocation {

croak $e if $e;

$self->set_param(SPATIAL_OUTPUT_FOR_LABEL_ALLOCATION => $sp);
$self->set_param($param_name => $sp);

return $sp;
}
Expand Down
64 changes: 58 additions & 6 deletions lib/Biodiverse/Randomise/CurveBall.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ use 5.022;

our $VERSION = '4.99_002';

use Carp qw /croak/;

use experimental 'refaliasing';
use experimental 'declared_refs';
no warnings 'experimental::refaliasing';
Expand Down Expand Up @@ -145,6 +147,44 @@ END_PROGRESS_TEXT

$progress_bar->reset;

my (%sp_swap_list, @gps_with_nbrs);
if (my $sp_conditions = $args{spatial_condition_for_swap_pairs}) {
my $sp_swapper = $self->get_spatial_output_for_label_allocation (
%args,
spatial_conditions_for_label_allocation => $sp_conditions,
param_name => 'SPATIAL_OUTPUT_FOR_SWAP_CANDIDATES',
elements_to_calc => \@sorted_groups, # excludes empty and full groups
);
if ($sp_swapper) {
my $spatial_conditions_arr = $sp_swapper->get_spatial_conditions;
my $sp_cond_obj = $spatial_conditions_arr->[0];
my $result_type = $sp_cond_obj->get_result_type;
if ($result_type eq 'always_true') {
say "[Randomise] spatial condition always_true, reverting to non-spatial allocation";
}
elsif ($result_type =~ /^always_false|self_only$/) {
croak "Spatial condition means it is impossible for groups to have neighbours, "
. "so cannot swap labels with neighbours"
if !@gps_with_nbrs;
}
else {
foreach my $element ($sp_swapper->get_element_list) {
my $nbrs = $sp_swapper->get_list_ref_aa ($element, '_NBR_SET1') // [];
# prefilter the focal group
my @filtered = sort grep {$_ ne $element} @$nbrs;
next if !@filtered;
$sp_swap_list{$element} = \@filtered;
}
@gps_with_nbrs = sort keys %sp_swap_list;
my $n_gps_w_nbrs = @gps_with_nbrs;
say "[Randomise] $n_gps_w_nbrs of $n_groups groups have swappable neighbours";
croak "No groups have neighbours, cannot swap labels with neighbours"
if !@gps_with_nbrs;
}
}
}
my $use_spatial_swap = !!%sp_swap_list;

# Basic algorithm:
# pick two different groups at random
# swap as many labels as possible
Expand All @@ -169,12 +209,24 @@ END_PROGRESS_TEXT
) {
$attempts++;

my $group1 = $sorted_groups[int $rand->rand ($n_groups)];
my $group2 = $sorted_groups[int $rand->rand ($n_groups)];
while ($group1 eq $group2) {
# handle pathological case of only one group
last MAIN_ITER if $n_groups == 1;
$group2 = $sorted_groups[int $rand->rand ($n_groups)];
# handle pathological case of only one group
last MAIN_ITER if $n_groups == 1;

my $group1; ;
my $group2;
if ($use_spatial_swap) {
$group1 = $gps_with_nbrs[int $rand->rand (scalar @gps_with_nbrs)];
my $n = scalar @{$sp_swap_list{$group1}};
next MAIN_ITER if !$n;
# we have already filtered group1 from its list
$group2 = $sp_swap_list{$group1}[int $rand->rand($n)]
}
else {
$group1 = $sorted_groups[int $rand->rand ($n_groups)];
$group2 = $sorted_groups[int $rand->rand($n_groups)];
while ($group1 eq $group2) { # keep trying - a bit wasteful but should be rare
$group2 = $sorted_groups[int $rand->rand($n_groups)];
}
}

my \%labels1 = $lb_hash{$group1};
Expand Down
41 changes: 36 additions & 5 deletions t/28-Randomisation.t
Original file line number Diff line number Diff line change
Expand Up @@ -125,15 +125,45 @@ sub test_rand_independent_swaps_modified {

sub test_rand_curveball {
test_rand_structured_richness_same (
'rand_curveball', swap_count => 1000,
'rand_curveball',
);
}

sub test_rand_curveball_sp_cond {
my $rand_bd_array = test_rand_structured_richness_same (
'rand_curveball',
spatial_condition_for_swap_pairs => 'sp_circle(radius => 100000)',
resolution => 100000,
log_suffix => ' with spatial condition'
);

# The site data have these groups as an isolated set. There should only be swapping among them
# so the total counts will be constant across realisations.
my $expected = {
'Genus:sp1' => 4,
'Genus:sp2' => 10,
'Genus:sp3' => 4,
};
my $i;
foreach my $bd (@$rand_bd_array) {
$i++;
my %collated_labels;
foreach my $gp (qw /3250000:3050000 3150000:2950000 3250000:2950000 3250000:2850000/) {
my $labels = $bd->get_labels_in_group_as_hash (group => $gp);
foreach my $label (keys %$labels) {
$collated_labels{$label} += $labels->{$label};
}
}
is \%collated_labels, $expected, "Curveball spatial: labels and counts for isolated subregion, rand bd $i";
}
}

sub test_rand_structured_richness_same {
my ($rand_function, %args) = @_;
$rand_function //= 'rand_structured';
my $log_suffix = delete $args{log_suffix} // '';

my $c = 100000;
my $c = delete $args{resolution} // 100000;
my $bd = get_basedata_object_from_site_data(CELL_SIZES => [$c, $c]);

# add some empty groups - need enough to trigger issue #543
Expand All @@ -152,6 +182,7 @@ sub test_rand_structured_richness_same {
$bd->add_element(group => $gp, label => $label);
}
}
$bd->build_spatial_index (resolutions => [$c, $c]);

# name is short for test_rand_calc_per_node_uses_orig_bd
my $sp = $bd->add_spatial_output (name => 'sp');
Expand Down Expand Up @@ -191,7 +222,7 @@ sub test_rand_structured_richness_same {
$obs_richness{$group} //= $bd->get_richness_aa ($group) // 0;
$rand_richness{$group} = $rand_bd->get_richness_aa ($group) // 0;
}
is \%rand_richness, \%obs_richness, "Richness scores match, $rand_function";
is \%rand_richness, \%obs_richness, "Richness scores match, $rand_function $log_suffix";
}

foreach my $rand_bd (@$rand_bd_array) {
Expand All @@ -200,10 +231,10 @@ sub test_rand_structured_richness_same {
$obs_range{$label} //= $bd->get_range (element => $label);
$rand_range{$label} = $rand_bd->get_range (element => $label);
}
is \%obs_range, \%rand_range, "Ranges match, $rand_function";
is \%obs_range, \%rand_range, "Ranges match, $rand_function $log_suffix";
}

return;
return $rand_bd_array;
}


Expand Down

0 comments on commit cd836a9

Please sign in to comment.