From 4537f52ffa96d3800018d02d9cb9d55c9ed917df Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 14:20:56 +1100 Subject: [PATCH 01/22] optimise RichnessEstimation.pm Faster::Maths allows a substantial speed-up (7-30%) of arithmetic code, of which there is a lot in this module. It currently segfaults in some circumstances so lexically disable it as needed. This has been reported to its RT queue. Also avoid some looping and memory usage in the ACE and ICE variance calculations. --- lib/Biodiverse/Indices/RichnessEstimation.pm | 96 ++++++++++++-------- 1 file changed, 58 insertions(+), 38 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index cc4de5aa1..409190abb 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -9,6 +9,12 @@ use Carp; use List::Util qw /max min sum/; use experimental qw /refaliasing/; +# segfaults wherever there is a while-each, map or postfix values loop, +# and possibly other conditions. These are all localised inside blocks +# where it is lexically disabled. +# see https://rt.cpan.org/Public/Dist/Display.html?Name=Faster-Maths +use Faster::Maths; + our $VERSION = '4.99_002'; my $metadata_class = 'Biodiverse::Metadata::Indices'; @@ -116,7 +122,7 @@ sub calc_chao1 { $chao_partial *= $cn1; my $chao = $richness + $chao_partial; - + if ($variance_uses_eq7) { $variance = $cn1 * ($f1 * ($f1 - 1)) / 2 + $cn2 * $f1 * (2 * $f1 - 1)**2 / 4 @@ -125,15 +131,17 @@ sub calc_chao1 { } elsif ($variance_uses_eq8) { my %sums; - foreach my $freq (values %$label_hash) { - $sums{$freq} ++; + { + no Faster::Maths; + $sums{$_}++ for values %$label_hash; } + my ($part1, $part2); foreach my $i (keys %sums) { my $f = $sums{$i}; #say "$i $f"; $part1 += $f * (exp (-$i) - exp (-2 * $i)); - $part2 += $i * exp (-$i) * $f; + $part2 += $i * exp (-$i) * $f; } $variance = $n ? $part1 - $part2 ** 2 / $n : 0; $chao_formula = 0; @@ -283,11 +291,13 @@ sub calc_chao2 { } elsif ($variance_uses_eq12) { # same structure as eq8 - could refactor my %sums; - foreach my $freq (values %$label_hash) { - $sums{$freq} ++; + { + no Faster::Maths; + $sums{$_}++ for values %$label_hash; } my ($part1, $part2); - while (my ($i, $Q) = each %sums) { + foreach my $i (keys %sums) { + my $Q = $sums{$i}; $part1 += $Q * (exp (-$i) - exp (-2 * $i)); $part2 += $i * exp (-$i) * $Q; } @@ -336,7 +346,7 @@ sub calc_chao2 { sub _calc_chao_confidence_intervals { my $self = shift; my %args = @_; - + my $f1 = $args{F1}; my $f2 = $args{F2}; my $chao = $args{chao_score}; @@ -359,13 +369,14 @@ sub _calc_chao_confidence_intervals { else { my $P = 0; my %sums; - foreach my $freq (values %$label_hash) { - $sums{$freq} ++; + { + no Faster::Maths; + $sums{$_}++ foreach values %$label_hash; } # set CIs to undefined if we only have singletons/uniques if ($richness && ! (scalar keys %sums == 1 && exists $sums{1})) { - while (my ($f, $count) = each %sums) { - $P += $count * exp (-$f); + foreach my $f (keys %sums) { + $P += $sums{$f} * exp (-$f); } $P /= $richness; my $part1 = $richness / (1 - $P); @@ -666,19 +677,18 @@ sub _get_ace_variance { $diff{$i} = $self->_get_ace_differential (%args, f => $i); foreach my $j (@sorted) { $cov{$i}{$j} - //= $cov{$j}{$i} //= $self->_get_ace_ice_cov (%args, i => $i, j => $j); last if $i == $j; } } my $var_ace = 0; - foreach my $i (keys %$freq_counts) { - foreach my $j (keys %$freq_counts) { - my $partial - = $diff{$i} * $diff{$j} * $cov{$i}{$j}; - $var_ace += $partial; + foreach my $i (@sorted) { + foreach my $j (@sorted) { + last if $i == $j; + $var_ace += 2 * $diff{$i} * $diff{$j} * $cov{$i}{$j}; } + $var_ace += ($diff{$i} ** 2) * $cov{$i}{$i}; } $var_ace ||= undef; @@ -699,19 +709,19 @@ sub _get_ice_variance { $diff{$i} = $self->_get_ice_differential (%args, f => $i); foreach my $j (@sorted) { $cov{$i}{$j} - //= $cov{$j}{$i} //= $self->_get_ace_ice_cov (%args, i => $i, j => $j); last if $i == $j; } } + # should fold this into the loop above my $var_ice = 0; - foreach my $i (keys %$freq_counts) { - foreach my $j (keys %$freq_counts) { - my $partial - = $diff{$i} * $diff{$j} * $cov{$i}{$j}; - $var_ice += $partial; + foreach my $i (@sorted) { + foreach my $j (@sorted) { + last if $i == $j; + $var_ice += 2 * $diff{$i} * $diff{$j} * $cov{$i}{$j}; } + $var_ice += ($diff{$i} ** 2) * $cov{$i}{$i}; } $var_ice ||= undef; @@ -751,14 +761,18 @@ sub _get_ice_differential { my @u = (1..$k); - $n_infreq //= - sum - map $_ * $freq_counts->{$_}, - grep $_ < $k, - keys %$freq_counts; + my $si; - my $si = sum map (($_ * ($_-1) * ($Q->{$_} // 0)), @u); + { + no Faster::Maths; + $n_infreq //= + sum + map $_ * $freq_counts->{$_}, + grep $_ < $k, + keys %$freq_counts; + $si = sum map(($_ * ($_ - 1) * ($Q->{$_} // 0)), @u); + } my ($Q1, $Q2) = @$Q{1,2}; $Q1 //= 0; $Q2 //= 0; @@ -871,14 +885,17 @@ sub _get_ace_differential { my @u = (1..$k); - $n_rare //= - sum - map $_ * $F->{$_}, - grep $_ <= $k, - keys %$F; - - my $si = sum map (($_ * ($_-1) * ($F->{$_} // 0)), @u); + my $si; + { + no Faster::Maths; + $n_rare //= + sum + map $_ * $F->{$_}, + grep $_ <= $k, + keys %$F; + $si = sum map(($_ * ($_ - 1) * ($F->{$_} // 0)), @u); + } my $f1 = $F->{1}; my $d; @@ -1006,7 +1023,10 @@ sub calc_hurlbert_es { my $label_hash = $args{label_hash_all}; my $N; - $N += $_ for values %$label_hash; + { + no Faster::Maths; + $N += $_ for values %$label_hash; + } \my @lgamma_arr = $self->_get_lgamma_arr (max_n => $N); From e626399c4beafcca5166c1041a27bb3d827bb6d0 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 14:30:56 +1100 Subject: [PATCH 02/22] RichnessEstimation.pm: fold two loops together No need to iterate over the same sets twice. --- lib/Biodiverse/Indices/RichnessEstimation.pm | 22 +++++++------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 409190abb..12128d247 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -704,29 +704,23 @@ sub _get_ice_variance { # precalculate the differentials and covariances my (%diff, %cov); + my $var = 0; + my @sorted = sort {$a <=> $b} keys %$freq_counts; foreach my $i (@sorted) { $diff{$i} = $self->_get_ice_differential (%args, f => $i); + my $cov; foreach my $j (@sorted) { - $cov{$i}{$j} - //= $self->_get_ace_ice_cov (%args, i => $i, j => $j); - last if $i == $j; - } - } - - # should fold this into the loop above - my $var_ice = 0; - foreach my $i (@sorted) { - foreach my $j (@sorted) { + $cov = $self->_get_ace_ice_cov (%args, i => $i, j => $j); last if $i == $j; - $var_ice += 2 * $diff{$i} * $diff{$j} * $cov{$i}{$j}; + $var += 2 * $diff{$i} * $diff{$j} * $cov; } - $var_ice += ($diff{$i} ** 2) * $cov{$i}{$i}; + $var += ($diff{$i} ** 2) * $cov; } - $var_ice ||= undef; + $var ||= undef; - return $var_ice; + return $var; } # common to ACE and ICE From 83d2016a6acd91832366070a2066083e36ef1240 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 14:39:54 +1100 Subject: [PATCH 03/22] RichnessEstimation.pm: unify the ACE and ICE variance methods The former is now a wrapper around the latter, the difference being which differential method is called. --- lib/Biodiverse/Indices/RichnessEstimation.pm | 41 +++++--------------- 1 file changed, 10 insertions(+), 31 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 12128d247..325bb8f64 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -662,38 +662,13 @@ sub calc_ice { return wantarray ? %results : \%results; } -# almost identical to _get_ice_variance but integrating the two -# would prob result in more complex code +# almost identical to _get_ice_variance so we are just a wrapper +# that calls a different internal method sub _get_ace_variance { my $self = shift; my %args = @_; - my $freq_counts = $args{freq_counts}; - - # precalculate the differentials and covariances - my (%diff, %cov); - my @sorted = sort {$a <=> $b} keys %$freq_counts; - foreach my $i (@sorted) { - $diff{$i} = $self->_get_ace_differential (%args, f => $i); - foreach my $j (@sorted) { - $cov{$i}{$j} - //= $self->_get_ace_ice_cov (%args, i => $i, j => $j); - last if $i == $j; - } - } - - my $var_ace = 0; - foreach my $i (@sorted) { - foreach my $j (@sorted) { - last if $i == $j; - $var_ace += 2 * $diff{$i} * $diff{$j} * $cov{$i}{$j}; - } - $var_ace += ($diff{$i} ** 2) * $cov{$i}{$i}; - } - - $var_ace ||= undef; - - return $var_ace; + return $self->_get_ice_variance(%args, for_ace => 1); } sub _get_ice_variance { @@ -702,13 +677,17 @@ sub _get_ice_variance { my $freq_counts = $args{freq_counts}; - # precalculate the differentials and covariances - my (%diff, %cov); + my $diff_method = $args{for_ace} + ? '_get_ace_differential' + : '_get_ice_differential'; + + # work through the differentials and covariances + my %diff; my $var = 0; my @sorted = sort {$a <=> $b} keys %$freq_counts; foreach my $i (@sorted) { - $diff{$i} = $self->_get_ice_differential (%args, f => $i); + $diff{$i} = $self->$diff_method (%args, f => $i); my $cov; foreach my $j (@sorted) { $cov = $self->_get_ace_ice_cov (%args, i => $i, j => $j); From 5a49b7de1304536c247d303d70a2e0b73a980555 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 14:50:44 +1100 Subject: [PATCH 04/22] RichnessEstimation.pm: iterate over arrays in ACE/ICE variance Marginally faster and uses a mite less memory. --- lib/Biodiverse/Indices/RichnessEstimation.pm | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 325bb8f64..cb464fb07 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -682,19 +682,23 @@ sub _get_ice_variance { : '_get_ice_differential'; # work through the differentials and covariances - my %diff; + my @diff; my $var = 0; my @sorted = sort {$a <=> $b} keys %$freq_counts; - foreach my $i (@sorted) { - $diff{$i} = $self->$diff_method (%args, f => $i); + # could use builtin::indexed here + foreach my $i (0..$#sorted) { + my $v1 = $sorted[$i]; + $diff[$i] = $self->$diff_method (%args, f => $v1); my $cov; - foreach my $j (@sorted) { - $cov = $self->_get_ace_ice_cov (%args, i => $i, j => $j); - last if $i == $j; - $var += 2 * $diff{$i} * $diff{$j} * $cov; + foreach my $j (0..$i-1) { + my $v2 = $sorted[$j]; + $cov = $self->_get_ace_ice_cov (%args, i => $v1, j => $v2); + $var += 2 * $diff[$i] * $diff[$j] * $cov; } - $var += ($diff{$i} ** 2) * $cov; + # now the $i with $i case + $cov = $self->_get_ace_ice_cov (%args, i => $v1, j => $v1); + $var += ($diff[$i] ** 2) * $cov; } $var ||= undef; From 147cd478424430534b2052b52f630a4b05ae74ab Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 18:35:01 +1100 Subject: [PATCH 05/22] RichnessEstimation.pm: reduce the _get_ace_ice_cov argument burden No need to pass all the args onwards when we only need four. --- lib/Biodiverse/Indices/RichnessEstimation.pm | 31 ++++++++++---------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index cb464fb07..c1d9b2f93 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -686,6 +686,7 @@ sub _get_ice_variance { my $var = 0; my @sorted = sort {$a <=> $b} keys %$freq_counts; + my %cov_args = %args{qw/freq_counts s_estimate/}; # could use builtin::indexed here foreach my $i (0..$#sorted) { my $v1 = $sorted[$i]; @@ -693,11 +694,11 @@ sub _get_ice_variance { my $cov; foreach my $j (0..$i-1) { my $v2 = $sorted[$j]; - $cov = $self->_get_ace_ice_cov (%args, i => $v1, j => $v2); + $cov = $self->_get_ace_ice_cov (%cov_args, i => $v1, j => $v2); $var += 2 * $diff[$i] * $diff[$j] * $cov; } # now the $i with $i case - $cov = $self->_get_ace_ice_cov (%args, i => $v1, j => $v1); + $cov = $self->_get_ace_ice_cov (%cov_args, i => $v1, j => $v1); $var += ($diff[$i] ** 2) * $cov; } @@ -708,9 +709,8 @@ sub _get_ice_variance { # common to ACE and ICE sub _get_ace_ice_cov { - my ($self, %args) = @_; - my ($i, $j, $s_ice) = @args{qw/i j s_estimate/}; - my $Q = $args{freq_counts}; + my (undef, %args) = @_; + my ($i, $j, $s_ice, $Q) = @args{qw/i j s_estimate freq_counts/}; return $i == $j ? $Q->{$i} * (1 - $Q->{$i} / $s_ice) @@ -855,31 +855,30 @@ sub _get_ace_differential { my $cv_rare_h = $args{cv}; my $n_rare = $args{n_rare}; - my $c_rare = $args{C_rare}; # get from gamma calcs + # my $c_rare = $args{C_rare}; # get from gamma calcs my $D_rare = $args{S_rare}; # richness of labels with sample counts < $k - my $F = $args{freq_counts}; - my $t = $args{t}; - - my @u = (1..$k); + \my %F = $args{freq_counts}; + # my $t = $args{t}; my $si; { no Faster::Maths; $n_rare //= sum - map $_ * $F->{$_}, + map $_ * $F{$_}, grep $_ <= $k, - keys %$F; + keys %F; - $si = sum map(($_ * ($_ - 1) * ($F->{$_} // 0)), @u); + # my @u = (1..$k); # no need to iterate over 1 + $si = sum map(($_ * ($_ - 1) * ($F{$_} // 0)), 2..$k); } - my $f1 = $F->{1}; + my $f1 = $F{1}; my $d; if ($cv_rare_h != 0) { if ($f == 1) { $d = (1 - $f1 / $n_rare + $D_rare * ($n_rare - $f1) / $n_rare**2) - / (1 - $f1/$n_rare)**2 + / (1 - $f1 / $n_rare)**2 + ( (1 - $f1/$n_rare)**2 * $n_rare * ($n_rare - 1) @@ -909,7 +908,7 @@ sub _get_ace_differential { + (1 - $f1 / $n_rare)**2 * $n_rare * $f ) ) - / (1 - $f1/$n_rare)**4 / ($n_rare)**2 / ($n_rare - 1)**2 + / (1 - $f1/ $n_rare)**4 / ($n_rare)**2 / ($n_rare - 1)**2 + ($f * $f1**2 / $n_rare**2) / (1 - $f1 / $n_rare)**2; } From 63036f1e1f378028fbc72d3a36c2ab4e2633ddf6 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 19:19:58 +1100 Subject: [PATCH 06/22] RichnessEstimation.pm: add array args variant of _get_ace_ice_cov --- lib/Biodiverse/Indices/RichnessEstimation.pm | 22 +++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index c1d9b2f93..dabb6a818 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -686,7 +686,6 @@ sub _get_ice_variance { my $var = 0; my @sorted = sort {$a <=> $b} keys %$freq_counts; - my %cov_args = %args{qw/freq_counts s_estimate/}; # could use builtin::indexed here foreach my $i (0..$#sorted) { my $v1 = $sorted[$i]; @@ -694,11 +693,16 @@ sub _get_ice_variance { my $cov; foreach my $j (0..$i-1) { my $v2 = $sorted[$j]; - $cov = $self->_get_ace_ice_cov (%cov_args, i => $v1, j => $v2); + $cov = $self->_get_ace_ice_cov_aa ( + $v1, $v2, $args{s_estimate}, $freq_counts + ); $var += 2 * $diff[$i] * $diff[$j] * $cov; } # now the $i with $i case - $cov = $self->_get_ace_ice_cov (%cov_args, i => $v1, j => $v1); + $cov = $self->_get_ace_ice_cov_aa ( + $v1, $v1, $args{s_estimate}, $freq_counts + ); + $var += ($diff[$i] ** 2) * $cov; } @@ -713,8 +717,16 @@ sub _get_ace_ice_cov { my ($i, $j, $s_ice, $Q) = @args{qw/i j s_estimate freq_counts/}; return $i == $j - ? $Q->{$i} * (1 - $Q->{$i} / $s_ice) - : -1 * $Q->{$i} * $Q->{$j} / $s_ice; + ? $Q->{$i} * (1 - $Q->{$i} / $s_ice) + : -1 * $Q->{$i} * $Q->{$j} / $s_ice; +} + +sub _get_ace_ice_cov_aa { + my (undef, $i, $j, $s_ice, $Q) = @_; + + return $i == $j + ? $Q->{$i} * (1 - $Q->{$i} / $s_ice) + : -1 * $Q->{$i} * $Q->{$j} / $s_ice; } From 289448a20d45a5f5b668cdbae4545e0ff5d88be1 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 19:52:01 +1100 Subject: [PATCH 07/22] RichnessEstimation.pm: use grep to avoid some calcs, and refalias a hash --- lib/Biodiverse/Indices/RichnessEstimation.pm | 24 ++++++++++++-------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index dabb6a818..3301a0d8d 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -745,11 +745,9 @@ sub _get_ice_differential { my $n_infreq = $args{n_rare}; my $C_infreq = $args{C_rare}; # get from gamma calcs my $D_infreq = $args{S_rare}; # richness of labels with sample counts < $k - my $Q = $args{f_rare}; + \my %Q = $args{f_rare}; my $t = $args{t}; - my @u = (1..$k); - my $si; { @@ -760,9 +758,13 @@ sub _get_ice_differential { grep $_ < $k, keys %$freq_counts; - $si = sum map(($_ * ($_ - 1) * ($Q->{$_} // 0)), @u); + $si = + sum + map { $_ * ($_ - 1) * $Q{$_} } + grep {$Q{$_}} + 2..$k; } - my ($Q1, $Q2) = @$Q{1,2}; + my ($Q1, $Q2) = @Q{1,2}; $Q1 //= 0; $Q2 //= 0; @@ -877,12 +879,16 @@ sub _get_ace_differential { no Faster::Maths; $n_rare //= sum - map $_ * $F{$_}, - grep $_ <= $k, + map {$_ * $F{$_}} + grep {$_ <= $k} keys %F; - # my @u = (1..$k); # no need to iterate over 1 - $si = sum map(($_ * ($_ - 1) * ($F{$_} // 0)), 2..$k); + # no need to iterate over 1 + $si = + sum + map {$_ * ($_ - 1) * $F{$_}} + grep {$F{$_}} + 2..$k; } my $f1 = $F{1}; my $d; From 4ec59a8581acf573457c46736fcbdb8c7fa297f4 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 20:02:11 +1100 Subject: [PATCH 08/22] Indices::calc_nonempty_elements_used: make direct use of element list args No need to pass them through calc_abc first. --- lib/Biodiverse/Indices/Indices.pm | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index 32328e900..c08152771 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -1518,7 +1518,6 @@ sub get_metadata_calc_nonempty_elements_used { name => 'Non-empty element counts', description => "Counts of non-empty elements in neighbour sets 1 and 2.\n", type => 'Lists and Counts', - pre_calc => 'calc_abc', uses_nbr_lists => 1, # how many sets of lists it must have indices => { EL_COUNT_NONEMPTY_SET1 => { @@ -1548,24 +1547,17 @@ sub calc_nonempty_elements_used { # should run a precalc_gobal to check if the # basedata has empty groups as then we can shortcut my $bd = $self->get_basedata_ref; - my $list = $args{element_list_all}; - my %nonempty; - foreach my $gp (@$list) { - my $ref = $bd->get_labels_in_group_as_hash (group => $gp); - next if !scalar keys %$ref; - $nonempty{$gp}++; - } - my $non_empty_all = scalar keys %nonempty; - my $non_empty_set1 = grep {exists $nonempty{$_}} keys %{$args{element_list1} // {}}; - my $non_empty_set2 = $args{element_list2} - ? grep {exists $nonempty{$_}} keys %{$args{element_list2}} + my $non_empty_set1 = grep {$bd->get_richness_aa($_)} @{$args{element_list1} // []}; + my $non_empty_set2 + = $args{element_list2} + ? grep {$bd->get_richness_aa($_)} @{$args{element_list2}} : undef; my %results = ( EL_COUNT_NONEMPTY_SET1 => $non_empty_set1, EL_COUNT_NONEMPTY_SET2 => $non_empty_set2, - EL_COUNT_NONEMPTY_ALL => $non_empty_all, + EL_COUNT_NONEMPTY_ALL => $non_empty_set1 + ($non_empty_set2 // 0), ); return wantarray ? %results : \%results; From d67b246ebfa8f99e1726372f6d9cc7803f26c894 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Wed, 14 Feb 2024 20:31:41 +1100 Subject: [PATCH 09/22] Add Faster::Maths to Makefile.PL and cpanfile --- Makefile.PL | 13 +++++++------ cpanfile | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Makefile.PL b/Makefile.PL index 64b5c88f4..de99842b4 100644 --- a/Makefile.PL +++ b/Makefile.PL @@ -6,13 +6,14 @@ use ExtUtils::MakeMaker; # these are from the cpanfile # need to look at https://metacpan.org/pod/ExtUtils::MakeMaker::CPANfile my %common_reqs = ( - "Class::Inspector" => 0, - "Clone" => "0.35", - "Cpanel::JSON::XS" => "3", + "Class::Inspector" => 0, + "Clone" => "0.35", + "Cpanel::JSON::XS" => "3", "Data::Structure::Util" => 0, - "Data::Compare" => 0, - "Exception::Class" => 0, - "Exporter::Easy" => 0, + "Data::Compare" => 0, + "Exception::Class" => 0, + "Exporter::Easy" => 0, + "Faster::Maths" => 0, "File::BOM" => 0, "File::Find::Rule" => 0, "Geo::Converter::dms2dd" => "0.05", diff --git a/cpanfile b/cpanfile index 1376f214f..292443153 100644 --- a/cpanfile +++ b/cpanfile @@ -8,6 +8,7 @@ requires "Excel::ValueReader::XLSX"; requires "Exception::Class"; requires "Exporter::Easy"; #requires "FFI::Platypus::Declare"; +requires "Faster::Maths"; requires "File::BOM"; requires "File::Find::Rule"; requires "Geo::Converter::dms2dd", "0.05"; @@ -49,7 +50,7 @@ requires "rlib"; #requires "Math::AnyNum"; # until we don't requires "Statistics::Descriptive::PDL", "0.15"; -suggests "Panda::Lib"; +# suggests "Panda::Lib"; suggests "Data::Recursive"; #test_requires => sub { From 499eccc5b709c2f1bfce3696908822e4fb393395 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 19 Feb 2024 16:18:18 +1100 Subject: [PATCH 10/22] Indices: ace_differential: Refactor the coverage estimate This is re-used many times in the calculations --- lib/Biodiverse/Indices/RichnessEstimation.pm | 41 ++++++++++---------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 3301a0d8d..a9cae323c 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -893,52 +893,53 @@ sub _get_ace_differential { my $f1 = $F{1}; my $d; + my $cov_est = 1 - $f1 / $n_rare; # coverage estimate if ($cv_rare_h != 0) { if ($f == 1) { - $d = (1 - $f1 / $n_rare + $D_rare * ($n_rare - $f1) / $n_rare**2) - / (1 - $f1 / $n_rare)**2 + $d = ($cov_est + $D_rare * ($n_rare - $f1) / $n_rare**2) + / $cov_est**2 + ( - (1 - $f1/$n_rare)**2 * $n_rare * ($n_rare - 1) + $cov_est**2 * $n_rare * ($n_rare - 1) * ($D_rare * $si + $f1 * $si) - $f1 * $D_rare * $si - * (-2 * (1 - $f1 / $n_rare) * ($n_rare - $f1) / $n_rare**2 + * (-2 * $cov_est * ($n_rare - $f1) / $n_rare**2 * $n_rare * ($n_rare - 1) - + (1 - $f1/$n_rare)**2*(2*$n_rare - 1) + + $cov_est**2 * (2*$n_rare - 1) ) ) - / (1 - $f1/$n_rare)**4 / $n_rare**2 / ($n_rare - 1)**2 - - (1 - $f1 / $n_rare + $f1 * ($n_rare - $f1) + / $cov_est**4 / $n_rare**2 / ($n_rare - 1)**2 + - ($cov_est + $f1 * ($n_rare - $f1) / $n_rare**2) - / (1 - $f1 / $n_rare)**2; + / $cov_est**2; } else { - $d = (1 - $f1 / $n_rare - $D_rare * $f * $f1 / $n_rare**2) - / (1 - $f1 / $n_rare)**2 + $d = ($cov_est - $D_rare * $f * $f1 / $n_rare**2) + / $cov_est**2 + ( - (1 - $f1 / $n_rare)**2 + $cov_est**2 * $n_rare * ($n_rare - 1) * $f1 * ($si + $D_rare * $f * ($f - 1)) - $f1 * $D_rare * $si * - (2 * (1 - $f1 / $n_rare) * $f1 * $f / $n_rare**2 + (2 * $cov_est * $f1 * $f / $n_rare**2 * $n_rare * ($n_rare - 1) - + (1 - $f1 / $n_rare)**2 * $f * ($n_rare - 1) - + (1 - $f1 / $n_rare)**2 * $n_rare * $f + + $cov_est**2 * $f * ($n_rare - 1) + + $cov_est**2 * $n_rare * $f ) ) - / (1 - $f1/ $n_rare)**4 / ($n_rare)**2 / ($n_rare - 1)**2 + / $cov_est**4 / $n_rare**2 / ($n_rare - 1)**2 + ($f * $f1**2 / $n_rare**2) - / (1 - $f1 / $n_rare)**2; + / $cov_est**2; } } else { if ($f == 1) { - $d = (1 - $f1 / $n_rare + $D_rare * ($n_rare - $f1) / $n_rare**2) - / (1 - $f1 / $n_rare)**2; + $d = ($cov_est + $D_rare * ($n_rare - $f1) / $n_rare**2) + / $cov_est**2; } else { - $d = (1 - $f1 / $n_rare - $D_rare * $f * $f1 / $n_rare**2) - / (1 - $f1 / $n_rare)**2; + $d = ($cov_est - $D_rare * $f * $f1 / $n_rare**2) + / $cov_est**2; } } From 0cdef071f0556715e60b42dc3bddb16142f43d5a Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 19 Feb 2024 16:23:44 +1100 Subject: [PATCH 11/22] Indices: ace_differential: Refactor ($n_rare - 1) This is re-used many times in the calculations --- lib/Biodiverse/Indices/RichnessEstimation.pm | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index a9cae323c..09e67beae 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -895,20 +895,21 @@ sub _get_ace_differential { my $cov_est = 1 - $f1 / $n_rare; # coverage estimate if ($cv_rare_h != 0) { + my $n_rare_1 = $n_rare - 1; if ($f == 1) { $d = ($cov_est + $D_rare * ($n_rare - $f1) / $n_rare**2) / $cov_est**2 + ( - $cov_est**2 * $n_rare * ($n_rare - 1) + $cov_est**2 * $n_rare * $n_rare_1 * ($D_rare * $si + $f1 * $si) - $f1 * $D_rare * $si * (-2 * $cov_est * ($n_rare - $f1) / $n_rare**2 - * $n_rare * ($n_rare - 1) + * $n_rare * $n_rare_1 + $cov_est**2 * (2*$n_rare - 1) ) ) - / $cov_est**4 / $n_rare**2 / ($n_rare - 1)**2 + / $cov_est**4 / $n_rare**2 / $n_rare_1**2 - ($cov_est + $f1 * ($n_rare - $f1) / $n_rare**2) / $cov_est**2; @@ -918,16 +919,16 @@ sub _get_ace_differential { / $cov_est**2 + ( $cov_est**2 - * $n_rare * ($n_rare - 1) * $f1 * + * $n_rare * $n_rare_1 * $f1 * ($si + $D_rare * $f * ($f - 1)) - $f1 * $D_rare * $si * (2 * $cov_est * $f1 * $f / $n_rare**2 - * $n_rare * ($n_rare - 1) - + $cov_est**2 * $f * ($n_rare - 1) + * $n_rare * $n_rare_1 + + $cov_est**2 * $f * $n_rare_1 + $cov_est**2 * $n_rare * $f ) ) - / $cov_est**4 / $n_rare**2 / ($n_rare - 1)**2 + / $cov_est**4 / $n_rare**2 / $n_rare_1**2 + ($f * $f1**2 / $n_rare**2) / $cov_est**2; } From 8e77c1629d4ece2aa6ec5c2ea1164c457961f702 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 19 Feb 2024 16:27:56 +1100 Subject: [PATCH 12/22] Indices: ice_differential: Refactor ($t - 1) This is re-used many times in the calculations --- lib/Biodiverse/Indices/RichnessEstimation.pm | 40 ++++++++++---------- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 09e67beae..b7b365ee8 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -770,17 +770,19 @@ sub _get_ice_differential { my ($d, $dc_infreq); + my $t_1 = $t - 1; # used many times below + if ($CV_infreq_h != 0) { if ($q == 1) { $dc_infreq = -1 * ( - $n_infreq * (($t - 1) * $Q1 + 2 * $Q2) * 2 * $Q1 * ($t - 1) - - ($t - 1) * $Q1**2 * (($t - 1) * ($Q1 + $n_infreq) + 2 * $Q2) + $n_infreq * ($t_1 * $Q1 + 2 * $Q2) * 2 * $Q1 * $t_1 + - $t_1 * $Q1**2 * ($t_1 * ($Q1 + $n_infreq) + 2 * $Q2) ) - / ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2)) ** 2; + / ($n_infreq * ($t_1 * $Q1 + 2 * $Q2)) ** 2; $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq ** 2 - + $t / ($t - 1) + + $t / $t_1 * ($C_infreq**2*$n_infreq*($n_infreq - 1) * ($D_infreq * $si + $Q1 * $si) - $Q1 * $D_infreq * $si * @@ -796,15 +798,15 @@ sub _get_ice_differential { } elsif ($q == 2){ $dc_infreq - = -( -($t - 1) * $Q1**2 * - (2 * ($t - 1) * $Q1 + 2 * ($n_infreq + 2 * $Q2)) + = -( -$t_1 * $Q1**2 * + (2 * $t_1 * $Q1 + 2 * ($n_infreq + 2 * $Q2)) ) / - ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2))**2; + ($n_infreq * ($t_1 * $Q1 + 2 * $Q2))**2; $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq**2 - + $t / ($t - 1) + + $t / $t_1 * ($C_infreq**2 * $n_infreq * ($n_infreq - 1) * $Q1 * ($si + 2 * $D_infreq) - $Q1 * $D_infreq * $si * (2 * $C_infreq * $dc_infreq * $n_infreq * ($n_infreq - 1) + $C_infreq**2 * 2 * ($n_infreq - 1) + $C_infreq**2 * $n_infreq * 2) ) @@ -813,11 +815,11 @@ sub _get_ice_differential { } else { $dc_infreq = - - ( - ($t - 1) * $Q1**2 * (($t - 1) * $Q1 * $q + 2 * $Q2 * $q)) - / ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2))**2; + - ( - $t_1 * $Q1**2 * ($t_1 * $Q1 * $q + 2 * $Q2 * $q)) + / ($n_infreq * ($t_1 * $Q1 + 2 * $Q2))**2; $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq**2 - + $t/($t - 1) + + $t/$t_1 * ($C_infreq**2 * $n_infreq * ($n_infreq - 1) * $Q1 * ($si + $q * ($q - 1) * $D_infreq) - $Q1 * $D_infreq * $si * (2 * $C_infreq * $dc_infreq * $n_infreq * ($n_infreq - 1) + $C_infreq**2 * $q * ($n_infreq - 1) @@ -832,24 +834,24 @@ sub _get_ice_differential { if ($q == 1) { $dc_infreq = -1 * - ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2) * 2 * $Q1 * ($t - 1) - - ($t - 1) * $Q1**2 * (($t - 1) * ($Q1 + $n_infreq) + 2 * $Q2) + ($n_infreq * ($t_1 * $Q1 + 2 * $Q2) * 2 * $Q1 * $t_1 + - $t_1 * $Q1**2 * ($t_1 * ($Q1 + $n_infreq) + 2 * $Q2) ) - / ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2))**2; + / ($n_infreq * ($t_1 * $Q1 + 2 * $Q2))**2; } elsif ($q == 2) { $dc_infreq = -1 * - ( -1 * ($t - 1) * $Q1**2 * - (2 * ($t - 1) * $Q1 + 2 * ($n_infreq + 2 * $Q2)) + ( -1 * $t_1 * $Q1**2 * + (2 * $t_1 * $Q1 + 2 * ($n_infreq + 2 * $Q2)) ) - / ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2))**2; + / ($n_infreq * ($t_1 * $Q1 + 2 * $Q2))**2; } else { $dc_infreq = -1 * - ( -1 * ($t - 1) * $Q1**2 * (($t - 1) * $Q1 * $q + 2 * $Q2 * $q)) - / ($n_infreq * (($t - 1) * $Q1 + 2 * $Q2))**2; + ( -1 * $t_1 * $Q1**2 * ($t_1 * $Q1 * $q + 2 * $Q2 * $q)) + / ($n_infreq * ($t_1 * $Q1 + 2 * $Q2))**2; } $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq**2; } From 0f84deb78677766b80bc6fcdb5f6bda2787e7530 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Mon, 19 Feb 2024 16:30:41 +1100 Subject: [PATCH 13/22] Indices: ice_differential: Refactor ($n_infreq - 1) This is re-used many times in the calculations --- lib/Biodiverse/Indices/RichnessEstimation.pm | 23 ++++++++++---------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index b7b365ee8..e06728346 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -773,6 +773,7 @@ sub _get_ice_differential { my $t_1 = $t - 1; # used many times below if ($CV_infreq_h != 0) { + my $n_infreq_1 = $n_infreq - 1; if ($q == 1) { $dc_infreq = -1 * ( @@ -783,17 +784,17 @@ sub _get_ice_differential { $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq ** 2 + $t / $t_1 - * ($C_infreq**2*$n_infreq*($n_infreq - 1) + * ($C_infreq**2*$n_infreq*$n_infreq_1 * ($D_infreq * $si + $Q1 * $si) - $Q1 * $D_infreq * $si * (2 * $C_infreq * $dc_infreq - * $n_infreq * ($n_infreq - 1) + * $n_infreq * $n_infreq_1 + $C_infreq ** 2 - * ($n_infreq - 1) + * $n_infreq_1 + $C_infreq ** 2 * $n_infreq ) - ) / $C_infreq ** 4 / $n_infreq ** 2 / ($n_infreq - 1) ** 2 + ) / $C_infreq ** 4 / $n_infreq ** 2 / $n_infreq_1 ** 2 - ($C_infreq - $Q1 * $dc_infreq) / $C_infreq**2; } elsif ($q == 2){ @@ -807,10 +808,10 @@ sub _get_ice_differential { $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq**2 + $t / $t_1 - * ($C_infreq**2 * $n_infreq * ($n_infreq - 1) * $Q1 * ($si + 2 * $D_infreq) - $Q1 * $D_infreq * $si * - (2 * $C_infreq * $dc_infreq * $n_infreq * ($n_infreq - 1) + $C_infreq**2 * 2 * ($n_infreq - 1) + $C_infreq**2 * $n_infreq * 2) + * ($C_infreq**2 * $n_infreq * $n_infreq_1 * $Q1 * ($si + 2 * $D_infreq) - $Q1 * $D_infreq * $si * + (2 * $C_infreq * $dc_infreq * $n_infreq * $n_infreq_1 + $C_infreq**2 * 2 * $n_infreq_1 + $C_infreq**2 * $n_infreq * 2) ) - / $C_infreq**4 / $n_infreq**2 / ($n_infreq - 1)**2 + / $C_infreq**4 / $n_infreq**2 / $n_infreq_1**2 - ( -$Q1 * $dc_infreq) / $C_infreq**2; } else { @@ -820,13 +821,13 @@ sub _get_ice_differential { $d = ($C_infreq - $D_infreq * $dc_infreq) / $C_infreq**2 + $t/$t_1 - * ($C_infreq**2 * $n_infreq * ($n_infreq - 1) * $Q1 * ($si + $q * ($q - 1) * $D_infreq) - $Q1 * $D_infreq * $si - * (2 * $C_infreq * $dc_infreq * $n_infreq * ($n_infreq - 1) - + $C_infreq**2 * $q * ($n_infreq - 1) + * ($C_infreq**2 * $n_infreq * $n_infreq_1 * $Q1 * ($si + $q * ($q - 1) * $D_infreq) - $Q1 * $D_infreq * $si + * (2 * $C_infreq * $dc_infreq * $n_infreq * $n_infreq_1 + + $C_infreq**2 * $q * $n_infreq_1 + $C_infreq**2 * $n_infreq * $q ) ) - / $C_infreq**4 / $n_infreq**2 / ($n_infreq - 1)**2 + / $C_infreq**4 / $n_infreq**2 / $n_infreq_1**2 - ( - $Q1 * $dc_infreq) / $C_infreq**2; } } From e964fdd52c0a10dd6052bc8c8df3409e0edc7cd4 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 13:36:11 +1100 Subject: [PATCH 14/22] GUI legend: handle non-sequential categorical classes --- lib/Biodiverse/GUI/Legend.pm | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/Biodiverse/GUI/Legend.pm b/lib/Biodiverse/GUI/Legend.pm index 236cf49ae..05a9b165c 100644 --- a/lib/Biodiverse/GUI/Legend.pm +++ b/lib/Biodiverse/GUI/Legend.pm @@ -213,10 +213,12 @@ sub make_rect { my $label_hash = $self->{categorical}{labels}; my $n = (scalar keys %$label_hash) - 1; + my @classes = sort {$a <=> $b} keys %$label_hash; + $n = $#classes; foreach my $row (0..($height - 1)) { # cat 0 at the top - my $class = $n - int (0.5 + $n * $row / ($height - 1)); - my $colour = $self->get_colour_categorical ($class); + my $class_iter = $n - int (0.5 + $n * $row / ($height - 1)); + my $colour = $self->get_colour_categorical ($classes[$class_iter]); $self->add_row($self->{legend_colours_group}, $row, $colour); } } From b9ec58984d276a9ecf4b5af5f493c3d2ba069196 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 13:37:49 +1100 Subject: [PATCH 15/22] RichnessEstimation.pm: metadata indices are categorical Add class details and default colours to the metadata. --- lib/Biodiverse/Indices/RichnessEstimation.pm | 70 ++++++++++++++++++-- 1 file changed, 66 insertions(+), 4 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index e06728346..7b5a6645f 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -14,6 +14,7 @@ use experimental qw /refaliasing/; # where it is lexically disabled. # see https://rt.cpan.org/Public/Dist/Display.html?Name=Faster-Maths use Faster::Maths; +#no if ($Faster::Maths::VERSION le '0.02') => 'Faster::Maths'; our $VERSION = '4.99_002'; @@ -23,7 +24,49 @@ use Readonly; Readonly my $z_for_ci => 1.959964; # currently hard coded for 0.95 +# should be in a Common.pm subclass +sub get_palette_colorbrewer13 { + # Paired colour scheme from colorbrewer, plus a dark grey + # note - this works poorly when 9 or fewer groups are selected + no warnings 'qw'; # we know the hashes are not comments + return qw '#A6CEE3 #1F78B4 #B2DF8A #33A02C + #FB9A99 #E31A1C #FDBF6F #FF7F00 + #CAB2D6 #6A3D9A #FFFF99 #B15928 + #4B4B4B'; +} + +sub get_palette_colorbrewer9_set1 { + # Set1 colour scheme from www.colorbrewer2.org + no warnings 'qw'; # we know the hashes are not comments + return qw '#E41A1C #377EB8 #4DAF4A #984EA3 + #FF7F00 #FFFF33 #A65628 #F781BF + #999999'; +} + +sub get_palette_colorbrewer9_paired { + # 9 class paired colour scheme from www.colorbrewer2.org + return ('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6'); +} + +sub get_palette_colorbrewer9_set3 { + # 9 class paired colour scheme from www.colorbrewer2.org + return ('#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9'); +} + sub get_metadata_calc_chao1 { + my $self = shift; + + my @cb_palette = $self->get_palette_colorbrewer9_paired; + my (%colours, %labels); + my @codes = (0, 2, 6, 7, 8, 13, 14); + foreach my $i (0..$#codes) { + my $code = $codes[$i]; + $colours{$code} = $cb_palette[$i]; + $labels{$code} = "eqn $code"; + say "colours $code $colours{$code}"; + say "label $code $labels{$code}"; + } + my %metadata = ( description => 'Chao1 species richness estimator (abundance based)', name => 'Chao1', @@ -58,10 +101,13 @@ sub get_metadata_calc_chao1 { description => 'Upper confidence interval for the Chao1 estimate', }, CHAO1_META => { - description => 'Metadata indicating which formulae were used in the ' - . 'calculations. Numbers refer to EstimateS equations at ' - . 'http://viceroy.eeb.uconn.edu/EstimateS/EstimateSPages/EstSUsersGuide/EstimateSUsersGuide.htm', - type => 'list', + description => 'Metadata indicating which formulae were used in the ' + . 'calculations. Numbers refer to EstimateS equations at ' + . 'http://viceroy.eeb.uconn.edu/EstimateS/EstimateSPages/EstSUsersGuide/EstimateSUsersGuide.htm', + type => 'list', + distribution => 'categorical', + colours => \%colours, + labels => \%labels, }, }, ); @@ -183,6 +229,19 @@ sub calc_chao1 { sub get_metadata_calc_chao2 { + my $self = shift; + + my @cb_palette = $self->get_palette_colorbrewer9_paired; + my (%colours, %labels); + my @codes = (0, 4, 10, 11, 12, 13, 14); + foreach my $i (0..$#codes) { + my $code = $codes[$i]; + $colours{$code} = $cb_palette[$i]; + $labels{$code} = "eqn $code"; + say "colours $code $colours{$code}"; + say "label $code $labels{$code}"; + } + my %metadata = ( description => 'Chao2 species richness estimator (incidence based)', name => 'Chao2', @@ -221,6 +280,9 @@ sub get_metadata_calc_chao2 { . 'calculations. Numbers refer to EstimateS equations at ' . 'http://viceroy.eeb.uconn.edu/EstimateS/EstimateSPages/EstSUsersGuide/EstimateSUsersGuide.htm', type => 'list', + distribution => 'categorical', + colours => \%colours, + labels => \%labels, }, }, ); From 78f80d4991a1e967c59630875a1fedbf64723010 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 13:48:44 +1100 Subject: [PATCH 16/22] Categorical indices: update test expectations An empty array is expected for the bounds, rather than undef. --- t/23-Indices.t | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/23-Indices.t b/t/23-Indices.t index 2b5865c64..7ea0cacbe 100644 --- a/t/23-Indices.t +++ b/t/23-Indices.t @@ -247,8 +247,8 @@ sub test_index_bounds { my $metadata = $indices_object->get_metadata( sub => $index_source ); if ($metadata->get_index_is_categorical($index)) { # some of these structures still need to be finalised - is $bounds, undef, - "Bounds undefined for categorical index $index"; + is $bounds, [], + "No bounds for categorical index $index"; my $labels = $indices_object->get_index_category_labels (index => $index); is $labels, hash { all_vals D(); From 4faa2694e943c7329e445a2a46fa455337ad9904 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 13:49:34 +1100 Subject: [PATCH 17/22] Refactor colour palettes into a Common:: subpackage. --- lib/Biodiverse/Common/ColourPalettes.pm | 39 ++++++++++++++++++++ lib/Biodiverse/Indices/RichnessEstimation.pm | 33 +---------------- 2 files changed, 41 insertions(+), 31 deletions(-) create mode 100644 lib/Biodiverse/Common/ColourPalettes.pm diff --git a/lib/Biodiverse/Common/ColourPalettes.pm b/lib/Biodiverse/Common/ColourPalettes.pm new file mode 100644 index 000000000..e5ebf8494 --- /dev/null +++ b/lib/Biodiverse/Common/ColourPalettes.pm @@ -0,0 +1,39 @@ +package Biodiverse::Common::ColourPalettes; +use strict; +use warnings; + +# common colour palettes + +# should be in a Common.pm subclass +sub get_palette_colorbrewer13_paired { + # Paired colour scheme from colorbrewer, plus a dark grey + # note - this works poorly when 9 or fewer groups are selected + no warnings 'qw'; # we know the hashes are not comments + my @palette = qw '#A6CEE3 #1F78B4 #B2DF8A #33A02C + #FB9A99 #E31A1C #FDBF6F #FF7F00 + #CAB2D6 #6A3D9A #FFFF99 #B15928 + #4B4B4B'; + return wantarray ? @palette : [@palette]; +} + +sub get_palette_colorbrewer9_paired { + # 9 class paired colour scheme from www.colorbrewer2.org + my @palette = ('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6'); + return wantarray ? @palette : [@palette]; +} + +sub get_palette_colorbrewer9_set1 { + # Set1 colour scheme from www.colorbrewer2.org + my @palette = ('#E41A1C', '#377EB8', '#4DAF4A', '#984EA3', + '#FF7F00', '#FFFF33', '#A65628', '#F781BF', + '#999999'); + return wantarray ? @palette : [@palette]; +} + +sub get_palette_colorbrewer9_set3 { + # 9 class paired colour scheme from www.colorbrewer2.org + my @palette = ('#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9'); + return wantarray ? @palette : [@palette]; +} + +1; \ No newline at end of file diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 7b5a6645f..9e08be12e 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -9,6 +9,8 @@ use Carp; use List::Util qw /max min sum/; use experimental qw /refaliasing/; +use parent 'Biodiverse::Common::ColourPalettes'; + # segfaults wherever there is a while-each, map or postfix values loop, # and possibly other conditions. These are all localised inside blocks # where it is lexically disabled. @@ -24,34 +26,7 @@ use Readonly; Readonly my $z_for_ci => 1.959964; # currently hard coded for 0.95 -# should be in a Common.pm subclass -sub get_palette_colorbrewer13 { - # Paired colour scheme from colorbrewer, plus a dark grey - # note - this works poorly when 9 or fewer groups are selected - no warnings 'qw'; # we know the hashes are not comments - return qw '#A6CEE3 #1F78B4 #B2DF8A #33A02C - #FB9A99 #E31A1C #FDBF6F #FF7F00 - #CAB2D6 #6A3D9A #FFFF99 #B15928 - #4B4B4B'; -} - -sub get_palette_colorbrewer9_set1 { - # Set1 colour scheme from www.colorbrewer2.org - no warnings 'qw'; # we know the hashes are not comments - return qw '#E41A1C #377EB8 #4DAF4A #984EA3 - #FF7F00 #FFFF33 #A65628 #F781BF - #999999'; -} -sub get_palette_colorbrewer9_paired { - # 9 class paired colour scheme from www.colorbrewer2.org - return ('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6'); -} - -sub get_palette_colorbrewer9_set3 { - # 9 class paired colour scheme from www.colorbrewer2.org - return ('#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9'); -} sub get_metadata_calc_chao1 { my $self = shift; @@ -63,8 +38,6 @@ sub get_metadata_calc_chao1 { my $code = $codes[$i]; $colours{$code} = $cb_palette[$i]; $labels{$code} = "eqn $code"; - say "colours $code $colours{$code}"; - say "label $code $labels{$code}"; } my %metadata = ( @@ -238,8 +211,6 @@ sub get_metadata_calc_chao2 { my $code = $codes[$i]; $colours{$code} = $cb_palette[$i]; $labels{$code} = "eqn $code"; - say "colours $code $colours{$code}"; - say "label $code $labels{$code}"; } my %metadata = ( From 47c79f0cfffcd41b3f8ca495ebc1741d89d97c45 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 14:26:40 +1100 Subject: [PATCH 18/22] Colour palettes: generalise names, add more palettes There is no need for the number of classes in the sub name as they are all from the same sets. It is just that some sets have less colours in total. And add all the colorbrewer qualitative palettes while we are here. --- lib/Biodiverse/Common/ColourPalettes.pm | 86 +++++++++++++++++++------ 1 file changed, 68 insertions(+), 18 deletions(-) diff --git a/lib/Biodiverse/Common/ColourPalettes.pm b/lib/Biodiverse/Common/ColourPalettes.pm index e5ebf8494..58d33bb25 100644 --- a/lib/Biodiverse/Common/ColourPalettes.pm +++ b/lib/Biodiverse/Common/ColourPalettes.pm @@ -2,38 +2,88 @@ package Biodiverse::Common::ColourPalettes; use strict; use warnings; -# common colour palettes +# A set of colour palettes. +# Add to as needed. -# should be in a Common.pm subclass -sub get_palette_colorbrewer13_paired { +sub get_palette_colorbrewer_paired { # Paired colour scheme from colorbrewer, plus a dark grey # note - this works poorly when 9 or fewer groups are selected no warnings 'qw'; # we know the hashes are not comments - my @palette = qw '#A6CEE3 #1F78B4 #B2DF8A #33A02C - #FB9A99 #E31A1C #FDBF6F #FF7F00 - #CAB2D6 #6A3D9A #FFFF99 #B15928 - #4B4B4B'; + my @palette = ( + '#A6CEE3', '#1F78B4', '#B2DF8A', '#33A02C', + '#FB9A99', '#E31A1C', '#FDBF6F', '#FF7F00', + '#CAB2D6', '#6A3D9A', '#FFFF99', '#B15928', + '#4B4B4B', + ); return wantarray ? @palette : [@palette]; } -sub get_palette_colorbrewer9_paired { - # 9 class paired colour scheme from www.colorbrewer2.org - my @palette = ('#a6cee3','#1f78b4','#b2df8a','#33a02c','#fb9a99','#e31a1c','#fdbf6f','#ff7f00','#cab2d6'); +sub get_palette_colorbrewer_set1 { + # 9 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#E41A1C', '#377EB8', '#4DAF4A', '#984EA3', + '#FF7F00', '#FFFF33', '#A65628', '#F781BF', + '#999999', + ); return wantarray ? @palette : [@palette]; } -sub get_palette_colorbrewer9_set1 { - # Set1 colour scheme from www.colorbrewer2.org - my @palette = ('#E41A1C', '#377EB8', '#4DAF4A', '#984EA3', - '#FF7F00', '#FFFF33', '#A65628', '#F781BF', - '#999999'); +sub get_palette_colorbrewer_set2 { + # 8 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#66c2a5', '#fc8d62', '#8da0cb', '#e78ac3', + '#a6d854', '#ffd92f', '#e5c494', '#b3b3b3', + ); + return wantarray ? @palette : [@palette]; +} + +sub get_palette_colorbrewer_set3 { + # 12 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#8dd3c7', '#ffffb3', '#bebada', '#fb8072', + '#80b1d3', '#fdb462', '#b3de69', '#fccde5', + '#d9d9d9', '#bc80bd', '#ccebc5', '#ffed6f', + ); + return wantarray ? @palette : [@palette]; +} + +sub get_palette_colorbrewer_pastel1 { + # 9 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#fbb4ae', '#b3cde3', '#ccebc5', '#decbe4', + '#fed9a6', '#ffffcc', '#e5d8bd', '#fddaec', + '#f2f2f2' + ); return wantarray ? @palette : [@palette]; } -sub get_palette_colorbrewer9_set3 { - # 9 class paired colour scheme from www.colorbrewer2.org - my @palette = ('#8dd3c7','#ffffb3','#bebada','#fb8072','#80b1d3','#fdb462','#b3de69','#fccde5','#d9d9d9'); +sub get_palette_colorbrewer_pastel2 { + # 8 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#b3e2cd', '#fdcdac', '#cbd5e8', '#f4cae4', + '#e6f5c9', '#fff2ae', '#f1e2cc', '#cccccc' + ); return wantarray ? @palette : [@palette]; } +sub get_palette_colorbrewer_accent { + # 8 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#7fc97f', '#beaed4', '#fdc086', '#ffff99', + '#386cb0', '#f0027f', '#bf5b17', '#666666' + ); + return wantarray ? @palette : [@palette]; +} + +sub get_palette_colorbrewer_dark2 { + # 8 class colour scheme from www.colorbrewer2.org + my @palette = ( + '#1b9e77', '#d95f02', '#7570b3', '#e7298a', + '#66a61e', '#e6ab02', '#a6761d', '#666666', + ); + return wantarray ? @palette : [@palette]; +} + + + 1; \ No newline at end of file From fbef78709978536be43b24ec195d3ac56ae3e81d Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 14:28:25 +1100 Subject: [PATCH 19/22] RichnessEstimation.pm: use modified method names for palettes --- lib/Biodiverse/Indices/RichnessEstimation.pm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 9e08be12e..794a92216 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -31,7 +31,7 @@ Readonly my $z_for_ci => 1.959964; # currently hard coded for 0.95 sub get_metadata_calc_chao1 { my $self = shift; - my @cb_palette = $self->get_palette_colorbrewer9_paired; + my @cb_palette = $self->get_palette_colorbrewer_paired; my (%colours, %labels); my @codes = (0, 2, 6, 7, 8, 13, 14); foreach my $i (0..$#codes) { @@ -204,7 +204,7 @@ sub calc_chao1 { sub get_metadata_calc_chao2 { my $self = shift; - my @cb_palette = $self->get_palette_colorbrewer9_paired; + my @cb_palette = $self->get_palette_colorbrewer_paired; my (%colours, %labels); my @codes = (0, 4, 10, 11, 12, 13, 14); foreach my $i (0..$#codes) { From 102fd124ac76023e176d606f53637fdf6b49274e Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 15:06:08 +1100 Subject: [PATCH 20/22] Formatting --- lib/Biodiverse/Indices/Indices.pm | 13 ++++++------- lib/Biodiverse/Indices/RichnessEstimation.pm | 4 +--- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/lib/Biodiverse/Indices/Indices.pm b/lib/Biodiverse/Indices/Indices.pm index c08152771..9b6fbf0e3 100644 --- a/lib/Biodiverse/Indices/Indices.pm +++ b/lib/Biodiverse/Indices/Indices.pm @@ -87,14 +87,13 @@ sub calc_richness { # calculate the aggregate richness for a set of elements my $self = shift; my %args = @_; # rest of args into a hash - my %results = (RICHNESS_ALL => $args{ABC}, - RICHNESS_SET1 => $args{A} + $args{B}, - RICHNESS_SET2 => $args{A} + $args{C}, - ); + my %results = ( + RICHNESS_ALL => $args{ABC}, + RICHNESS_SET1 => $args{A} + $args{B}, + RICHNESS_SET2 => $args{A} + $args{C}, + ); - return wantarray - ? (%results) - : \%results; + return wantarray ? %results : \%results; } sub get_metadata_calc_redundancy { diff --git a/lib/Biodiverse/Indices/RichnessEstimation.pm b/lib/Biodiverse/Indices/RichnessEstimation.pm index 794a92216..3152e147c 100644 --- a/lib/Biodiverse/Indices/RichnessEstimation.pm +++ b/lib/Biodiverse/Indices/RichnessEstimation.pm @@ -22,9 +22,7 @@ our $VERSION = '4.99_002'; my $metadata_class = 'Biodiverse::Metadata::Indices'; -use Readonly; - -Readonly my $z_for_ci => 1.959964; # currently hard coded for 0.95 +my $z_for_ci = 1.959964; # currently hard coded for 0.95 From 527511ff80605c3cd7f7af63dd187bd9b19a9583 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 15:22:58 +1100 Subject: [PATCH 21/22] Remove non-needed module loads, mostly Readonly and Carp These are not used in these packages. --- lib/Biodiverse/GUI/GUIManager.pm | 1 - lib/Biodiverse/GUI/Manager/BaseDatas.pm | 1 - lib/Biodiverse/GUI/Tabs/CalculationsTree.pm | 1 - lib/Biodiverse/Metadata/BaseStruct.pm | 2 -- lib/Biodiverse/Metadata/Parameter.pm | 2 -- lib/Biodiverse/Metadata/Randomisation.pm | 2 -- lib/Biodiverse/Metadata/SpatialConditions.pm | 3 --- 7 files changed, 12 deletions(-) diff --git a/lib/Biodiverse/GUI/GUIManager.pm b/lib/Biodiverse/GUI/GUIManager.pm index 54ac3682e..872fa67ad 100644 --- a/lib/Biodiverse/GUI/GUIManager.pm +++ b/lib/Biodiverse/GUI/GUIManager.pm @@ -13,7 +13,6 @@ use Carp; use Scalar::Util qw /blessed/; use English ( -no_match_vars ); -use Readonly; use FindBin qw ( $Bin ); use Path::Tiny qw /path/; diff --git a/lib/Biodiverse/GUI/Manager/BaseDatas.pm b/lib/Biodiverse/GUI/Manager/BaseDatas.pm index 3e57b05ac..651c14fa5 100644 --- a/lib/Biodiverse/GUI/Manager/BaseDatas.pm +++ b/lib/Biodiverse/GUI/Manager/BaseDatas.pm @@ -10,7 +10,6 @@ use Carp; use Scalar::Util qw /blessed/; use English ( -no_match_vars ); -use Readonly; use FindBin qw ( $Bin ); use Text::Wrapper; diff --git a/lib/Biodiverse/GUI/Tabs/CalculationsTree.pm b/lib/Biodiverse/GUI/Tabs/CalculationsTree.pm index 902f2e37b..b9d66e5be 100644 --- a/lib/Biodiverse/GUI/Tabs/CalculationsTree.pm +++ b/lib/Biodiverse/GUI/Tabs/CalculationsTree.pm @@ -21,7 +21,6 @@ use Text::Wrapper; our $VERSION = '4.99_002'; -#use Readonly; my $i; use constant MODEL_NAME_COL => $i || 0; use constant MODEL_INDEX_COL => ++$i; diff --git a/lib/Biodiverse/Metadata/BaseStruct.pm b/lib/Biodiverse/Metadata/BaseStruct.pm index 11b0f5d35..2e9eef52d 100644 --- a/lib/Biodiverse/Metadata/BaseStruct.pm +++ b/lib/Biodiverse/Metadata/BaseStruct.pm @@ -2,8 +2,6 @@ package Biodiverse::Metadata::BaseStruct; use strict; use warnings; use 5.016; -use Carp; -use Readonly; our $VERSION = '4.99_002'; diff --git a/lib/Biodiverse/Metadata/Parameter.pm b/lib/Biodiverse/Metadata/Parameter.pm index b0ec35b27..d493322c7 100644 --- a/lib/Biodiverse/Metadata/Parameter.pm +++ b/lib/Biodiverse/Metadata/Parameter.pm @@ -6,8 +6,6 @@ use warnings; # with params and specified extensively in import and export metadata use 5.016; -use Carp; -use Readonly; use parent qw /Biodiverse::Metadata/; diff --git a/lib/Biodiverse/Metadata/Randomisation.pm b/lib/Biodiverse/Metadata/Randomisation.pm index bcc72306f..9caef43ff 100644 --- a/lib/Biodiverse/Metadata/Randomisation.pm +++ b/lib/Biodiverse/Metadata/Randomisation.pm @@ -2,8 +2,6 @@ package Biodiverse::Metadata::Randomisation; use strict; use warnings; use 5.016; -use Carp; -use Readonly; use parent qw /Biodiverse::Metadata/; diff --git a/lib/Biodiverse/Metadata/SpatialConditions.pm b/lib/Biodiverse/Metadata/SpatialConditions.pm index 9a9af151a..b2426c955 100644 --- a/lib/Biodiverse/Metadata/SpatialConditions.pm +++ b/lib/Biodiverse/Metadata/SpatialConditions.pm @@ -2,9 +2,6 @@ package Biodiverse::Metadata::SpatialConditions; use strict; use warnings; use 5.016; -use Carp; -use Readonly; -use Clone qw /clone/; our $VERSION = '4.99_002'; From 18232bd139f4117cc6c1a269dad471c8c5ed4370 Mon Sep 17 00:00:00 2001 From: shawnlaffan Date: Tue, 20 Feb 2024 15:35:28 +1100 Subject: [PATCH 22/22] Add version to ColourPalettes.pm --- lib/Biodiverse/Common/ColourPalettes.pm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/Biodiverse/Common/ColourPalettes.pm b/lib/Biodiverse/Common/ColourPalettes.pm index 58d33bb25..3397ba9ed 100644 --- a/lib/Biodiverse/Common/ColourPalettes.pm +++ b/lib/Biodiverse/Common/ColourPalettes.pm @@ -2,6 +2,8 @@ package Biodiverse::Common::ColourPalettes; use strict; use warnings; +our $VERSION = '4.99_002'; + # A set of colour palettes. # Add to as needed.