Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Issue 919 calc abc audit #921

Merged
merged 7 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions lib/Biodiverse/GUI/CellPopup.pm
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,7 @@ sub show_output_list {
$model->set($iter, 0, $key , 1, $val);
}
}
elsif (is_listref($list_ref)) {
elsif (is_arrayref($list_ref)) {
my $numeric = 1;
foreach my $key (@$list_ref) {
if (! looks_like_number ($key)) {
Expand All @@ -394,7 +394,7 @@ sub show_output_list {

my @keys = $numeric
? sort {$a <=> $b} @$list_ref
: natsort keys @$list_ref;
: natsort @$list_ref;

foreach my $elt (@keys) {
#print "[Cell popup] Adding output array entry $elt\n";
Expand Down
102 changes: 91 additions & 11 deletions lib/Biodiverse/Indices/Indices.pm
Original file line number Diff line number Diff line change
Expand Up @@ -1567,8 +1567,11 @@ sub get_metadata_calc_element_lists_used {

my %metadata = (
name => "Element lists",
description => "Lists of elements used in neighbour sets 1 and 2.\n"
. 'These form the basis for all the spatial calculations.',
description =>
"[DEPRECATED] Lists of elements used in neighbour sets 1 and 2.\n"
. 'These form the basis for all the spatial calculations. '
. 'The return types are inconsistent. New code should use '
. 'calc_element_lists_used_as_arrays',
type => 'Lists and Counts',
pre_calc => 'calc_abc',
uses_nbr_lists => 1, # how many sets of lists it must have
Expand All @@ -1595,13 +1598,90 @@ sub get_metadata_calc_element_lists_used {

sub calc_element_lists_used {
my $self = shift;
my %args = @_; # rest of args into a hash

my $set1 = $args{element_list1};
my $set2 = $args{element_list2};
my $set3 = $args{element_list_all};

# these two are hashes
if ($set1 && !is_hashref $set1) {
my $href = {};
@$href{@$set1} = (1) x @$set1;
$set1 = $href;
}
if ($set2 && !is_hashref $set2) {
my $href = {};
@$href{@$set2} = (1) x @$set2;
$set2 = $href;
}
# this is an array
if ($set3 && is_hashref $set3) {
$set2 = [keys %$set3];
}

my %results = (
EL_LIST_SET1 => $set1,
EL_LIST_SET2 => $set2,
EL_LIST_ALL => $set3,
);

return wantarray ? %results : \%results;
}

sub get_metadata_calc_element_lists_used_as_arrays {
my $self = shift;

my %metadata = (
name => "Element arrays",
description => "Arrays of elements used in neighbour sets 1 and 2.\n"
. 'These form the basis for all the spatial calculations.',
type => 'Lists and Counts',
pre_calc => 'calc_abc',
uses_nbr_lists => 1, # how many sets of lists it must have
indices => {
EL_ARRAY_SET1 => {
description => 'Array of elements in neighbour set 1',
type => 'list',
},
EL_ARRAY_SET2 => {
description => 'Array of elements in neighbour set 2',
uses_nbr_lists => 2,
type => 'list',
},
EL_ARRAY_ALL => {
description => 'Array of elements in both neighbour sets',
uses_nbr_lists => 2,
type => 'list',
},
},
);

return $metadata_class->new(\%metadata);
}

sub calc_element_lists_used_as_arrays {
my $self = shift;
my %args = @_; # rest of args into a hash

my $set1 = $args{element_list1};
my $set2 = $args{element_list2};
my $set3 = $args{element_list_all};

if ($set1 && is_hashref $set1) {
$set1 = [keys %$set1];
}
if ($set2 && is_hashref $set2) {
$set2 = [keys %$set2];
}
if ($set3 && is_hashref $set3) {
$set3 = [keys %$set3];
}

my %results = (
EL_LIST_SET1 => $args{element_list1},
EL_LIST_SET2 => $args{element_list2},
EL_LIST_ALL => $args{element_list_all},
EL_ARRAY_SET1 => $set1,
EL_ARRAY_SET2 => $set2,
EL_ARRAY_ALL => $set3,
);

return wantarray ? %results : \%results;
Expand Down Expand Up @@ -1728,8 +1808,8 @@ sub _calc_abc_one_element {
label_hash_all => \%label_list_master,
label_hash1 => \%label_hash1,
label_hash2 => {},
element_list1 => {$element => 1},
element_list2 => {},
element_list1 => [$element],
element_list2 => [],
element_list_all => [$element],
element_count1 => 1,
element_count2 => 0,
Expand Down Expand Up @@ -1814,8 +1894,8 @@ sub _calc_abc_pairwise_mode {
label_hash_all => \%label_list_master,
label_hash1 => \%label_hash1,
label_hash2 => \%label_hash2,
element_list1 => {$element1 => 1},
element_list2 => {$element2 => 1},
element_list1 => [$element1],
element_list2 => [$element2],
element_list_all => [$element1, $element2],
element_count1 => 1,
element_count2 => 1,
Expand Down Expand Up @@ -2000,8 +2080,8 @@ sub _calc_abc { # required by all the other indices, as it gets the labels in
label_hash_all => \%label_list_master,
label_hash1 => $label_list{1},
label_hash2 => $label_list{2},
element_list1 => $element_check{1},
element_list2 => $element_check{2},
element_list1 => [keys %{$element_check{1}}],
element_list2 => [keys %{$element_check{2}}],
element_list_all => [keys %element_check_master],
element_count1 => $element_count1,
element_count2 => $element_count2,
Expand Down
12 changes: 6 additions & 6 deletions lib/Biodiverse/Indices/Phylogenetic.pm
Original file line number Diff line number Diff line change
Expand Up @@ -2538,7 +2538,7 @@ sub calc_phylo_abc {
foreach my $list_name (qw/element_list1 element_list2/) {
$i++; # start at 1 so we match the numbered names
my $el_list = $args{$list_name} // next BY_LIST;
my @elements = keys %$el_list;
\my @elements = $el_list; # FIXME FIXME - use directly
my $have_cache = (@elements == 1 && $cache->{$elements[0]});
$nodes_in_path[$i]
= (@elements == 0)
Expand All @@ -2564,9 +2564,9 @@ sub calc_phylo_abc {
# simplify the calcs as we only need to find $aa
my $cache
= $self->get_cached_value_dor_set_default_href ('_calc_phylo_abc_pairwise_branch_sum_cache');
my $sum_i = $cache->{(keys %{$args{element_list1}})[0]} # use postfix deref?
my $sum_i = $cache->{$args{element_list1}[0]} # use postfix deref?
//= (sum values %list1) // 0;
my $sum_j = $cache->{(keys %{$args{element_list2}})[0]}
my $sum_j = $cache->{$args{element_list2}[0]}
//= (sum values %list2) // 0;
# save some looping, mainly when there are large differences in key counts
if (keys %list1 <= keys %list2) {
Expand Down Expand Up @@ -2634,15 +2634,15 @@ sub _calc_phylo_abc_lists {
%args,
labels => $label_hash1,
tree_ref => $tree,
el_list => [keys %{$args{element_list1}}],
el_list => $args{element_list1},
);

my $nodes_in_path2 = scalar %{$args{element_list2}}
my $nodes_in_path2 = @{$args{element_list2}}
? $self->get_path_lengths_to_root_node (
%args,
labels => $label_hash2,
tree_ref => $tree,
el_list => [keys %{$args{element_list2}}],
el_list => $args{element_list2},
)
: {};

Expand Down
1 change: 0 additions & 1 deletion lib/Biodiverse/Indices/PhylogeneticRelative.pm
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,6 @@ sub calc_phylo_rpd1 {
my $total_tree_length = $tree->get_total_tree_length;

my $pd_p_score = $args{PD_P};
my $pd_score = $args{PD_P};
my $label_hash = $args{PHYLO_LABELS_ON_TREE};
my $richness = scalar keys %$label_hash;

Expand Down
28 changes: 6 additions & 22 deletions lib/Biodiverse/Indices/RWTurnover.pm
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,12 @@ sub calc_rw_turnover {
# or inverse of ranges
my $cache
= $self->get_cached_value_dor_set_default_href ('_calc_phylo_rwt_pairwise_branch_sum_cache');
# use postfix idiom?
# ideally we would only be passed arrays, but see issue #919
my $key1 = is_hashref ($args{element_list1})
? ((keys %{$args{element_list1}})[0])
: (${$args{element_list1}}[0]);
my $key2 = is_hashref ($args{element_list2})
? ((keys %{$args{element_list2}})[0])
: (${$args{element_list2} //[]}[0]);
# Could use a reduce call to collapse the "sum map {} @list" idiom,
# thus avoiding a list generation. These are only run once per group,
# though, so it might not matter.
my $sum_i = $cache->{$key1}
my $sum_i = $cache->{$args{element_list1}[0]}
//= (sum map {1 / $_} @ranges{keys %list1}) // 0;
my $sum_j = $cache->{$key2}
my $sum_j = $cache->{$args{element_list2}[0]}
//= (sum map {1 / $_} @ranges{keys %list2}) // 0;
# save some looping, mainly when there are large differences in key counts
if (keys %list1 <= keys %list2) {
Expand Down Expand Up @@ -173,20 +165,12 @@ sub calc_phylo_rw_turnover {
# simplify the calcs as we only need to find $aa
my $cache
= $self->get_cached_value_dor_set_default_href ('_calc_phylo_rwt_pairwise_branch_sum_cache');
# use postfix idiom?
# ideally we would only be passed arrays, but see issue #919
my $key1 = is_hashref ($args{element_list1})
? ((keys %{$args{element_list1}})[0])
: (${$args{element_list1}}[0]);
my $key2 = is_hashref ($args{element_list2})
? ((keys %{$args{element_list2}})[0])
: (${$args{element_list2} //[]}[0]);
# Could use a reduce call to collapse the "sum map {} @list" idiom,
# thus avoiding a list generation. These are only run once per group,
# though, so it might not matter.
my $sum_i = $cache->{$key1}
my $sum_i = $cache->{$args{element_list1}[0]}
//= (sum values %list1) // 0;
my $sum_j = $cache->{$key2}
my $sum_j = $cache->{$args{element_list2}[0]}
//= (sum values %list2) // 0;
# save some looping, mainly when there are large differences in key counts
if (keys %list1 <= keys %list2) {
Expand Down Expand Up @@ -333,7 +317,7 @@ sub get_metadata__calc_pe_lists_per_element_set {
set_path_length_cache_by_group_flag
get_inverse_range_weighted_path_lengths
/],
pre_calc => ['calc_abc'], # don't need calc_abc2 as we don't use its counts
pre_calc => [], # don't need calc_abc2 as we don't use its counts
uses_nbr_lists => 1, # how many lists it must have
required_args => {'tree_ref' => 1},
);
Expand All @@ -358,7 +342,7 @@ sub _calc_pe_lists_per_element_set {
foreach my $list_name (qw /element_list1 element_list2/) {
$i++; # start at 1 so we match the numbered names
my $el_list = $args{$list_name} // next BY_LIST;
my @elements = keys %$el_list;
\my @elements = $el_list; # FIXME
my $have_cache = (@elements == 1 && $cache->{$elements[0]});
$results[$i]
= $have_cache
Expand Down
51 changes: 32 additions & 19 deletions t/24-Indices-lists-and-counts.t
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ sub test_main {
calc_nonempty_elements_used
calc_elements_used
calc_element_lists_used
calc_element_lists_used_as_arrays
calc_abc_counts
calc_d
calc_local_range_lists
Expand All @@ -59,7 +60,7 @@ sub test_main {
/],
calc_topic_to_test => 'Lists and Counts',
sort_array_lists => 1,
#generate_result_sets => 1,
# generate_result_sets => 1,
);
}

Expand Down Expand Up @@ -116,7 +117,8 @@ done_testing;
__DATA__

@@ RESULTS_2_NBR_LISTS
{ ABC2_LABELS_ALL => {
{
ABC2_LABELS_ALL => {
'Genus:sp1' => 2,
'Genus:sp10' => 1,
'Genus:sp11' => 2,
Expand Down Expand Up @@ -277,19 +279,29 @@ __DATA__
ABC_B => 0,
ABC_C => 12,
ABC_D => 17,
EL_COUNT_ALL => 5,
EL_COUNT_SET1 => 1,
EL_COUNT_SET2 => 4,
EL_ARRAY_ALL => [
'3350000:750000', '3250000:850000',
'3350000:950000', '3450000:850000',
'3350000:850000'
],
EL_ARRAY_SET1 => ['3350000:850000'],
EL_ARRAY_SET2 => [
'3350000:950000', '3350000:750000',
'3450000:850000', '3250000:850000'
],
EL_COUNT_ALL => 5,
EL_COUNT_NONEMPTY_ALL => 5,
EL_COUNT_NONEMPTY_SET1 => 1,
EL_COUNT_NONEMPTY_SET2 => 4,
EL_LIST_ALL => [
'3250000:850000', '3350000:850000',
'3350000:750000', '3350000:950000',
'3450000:850000'
EL_COUNT_SET1 => 1,
EL_COUNT_SET2 => 4,
EL_LIST_ALL => [
'3350000:750000', '3250000:850000',
'3350000:950000', '3450000:850000',
'3350000:850000'
],
EL_LIST_SET1 => { '3350000:850000' => 1 },
EL_LIST_SET2 => {
EL_LIST_SET1 => { '3350000:850000' => 1 },
EL_LIST_SET2 => {
'3250000:850000' => 1,
'3350000:750000' => 1,
'3350000:950000' => 1,
Expand Down Expand Up @@ -317,7 +329,7 @@ __DATA__
RICHNESS_ALL => 14,
RICHNESS_SET1 => 2,
RICHNESS_SET2 => 14
}
};


@@ RESULTS_1_NBR_LISTS
Expand Down Expand Up @@ -356,15 +368,16 @@ __DATA__
Q095 => 4,
Q100 => 4
},
ABC3_SD_SET1 => '1.4142135623731',
ABC3_SUM_SET1 => 6,
ABC_D => 29,
EL_COUNT_SET1 => 1,
EL_COUNT_ALL => 1,
ABC3_SD_SET1 => '1.4142135623731',
ABC3_SUM_SET1 => 6,
ABC_D => 29,
EL_ARRAY_SET1 => ['3350000:850000'],
EL_COUNT_ALL => 1,
EL_COUNT_NONEMPTY_ALL => 1,
EL_COUNT_NONEMPTY_SET1 => 1,
EL_LIST_SET1 => { '3350000:850000' => 1 },
LABEL_COUNT_RANK_PCT => {
EL_COUNT_SET1 => 1,
EL_LIST_SET1 => { '3350000:850000' => 1 },
LABEL_COUNT_RANK_PCT => {
'Genus:sp20' => undef,
'Genus:sp26' => undef
},
Expand Down
Loading