Skip to content

Commit

Permalink
Refactor evaluation metrics to output Legolas table rows (#45)
Browse files Browse the repository at this point in the history
* wip

* Bump patch version, fix tests

* add missing param

* bump minor version

* add codecov

* more codecov

* Bump julia version to 1.6, testing to 1.6+1.7

* wip rt tests

* wip

* Support matrix serialization/deserialization

* Support matrix serialization/deserialization

* Replace missing with NaN

* Fix test dep

* test for inclusion of all metrics

* foiled by my own test case

* cleanup

* Add new docstrings to docs

* fix docs

* fix docstring

* export EvaluationRow

* remove uneeded dep

Co-authored-by: hannahilea <hannahilea@users.noreply.github.com>
  • Loading branch information
ericphanson and hannahilea authored Mar 18, 2022
1 parent fa2b427 commit 9e9de96
Show file tree
Hide file tree
Showing 11 changed files with 422 additions and 171 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/CI.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ jobs:
fail-fast: false
matrix:
version:
- '1.5'
- '1.7'
- '1.6'
os:
- ubuntu-latest
Expand Down
10 changes: 7 additions & 3 deletions Project.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
name = "Lighthouse"
uuid = "ac2c24cd-07f0-4848-96b2-1b82c3ea0e59"
authors = ["Beacon Biosignals, Inc."]
version = "0.13.4"
version = "0.14.0"

[deps]
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
Legolas = "741b9549-f6ed-4911-9fbf-4a1c0c97f0cd"
LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
Makie = "ee78f7c6-11fb-53f2-987a-cfe4a2b5a57a"
Expand All @@ -16,15 +17,18 @@ TensorBoardLogger = "899adc3e-224a-11e9-021f-63837185c80f"

[compat]
CairoMakie = "0.7"
Legolas = "0.3"
Makie = "0.16.5"
StatsBase = "0.33"
Tables = "1.7"
TensorBoardLogger = "0.1"
julia = "1.5"
julia = "1.6"

[extras]
CairoMakie = "13f3f980-e62b-5c42-98c6-ff1f3baf88f0"
StableRNGs = "860ef19b-820b-49d6-a774-d7a799459cd3"
Tables = "bd369af6-aec1-5ad0-b16a-f7cc5008161c"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"

[targets]
test = ["Test", "CairoMakie", "StableRNGs"]
test = ["Test", "CairoMakie", "StableRNGs", "Tables"]
3 changes: 3 additions & 0 deletions docs/src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ accuracy
binary_statistics
cohens_kappa
calibration_curve
EvaluationRow
Lighthouse.evaluation_metrics
Lighthouse._evaluation_row_dict
Lighthouse.evaluation_metrics_row
```

## Utilities
Expand Down
4 changes: 4 additions & 0 deletions src/Lighthouse.jl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ using StatsBase: StatsBase
using TensorBoardLogger
using Makie
using Printf
using Legolas

include("plotting.jl")

Expand All @@ -18,6 +19,9 @@ export confusion_matrix, accuracy, binary_statistics, cohens_kappa, calibration_
include("classifier.jl")
export AbstractClassifier

include("row.jl")
export EvaluationRow

include("learn.jl")
export LearnLogger, learn!, upon, evaluate!, predict!

Expand Down
202 changes: 110 additions & 92 deletions src/learn.jl

Large diffs are not rendered by default.

28 changes: 13 additions & 15 deletions src/metrics.jl
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ end
accuracy(confusion::AbstractMatrix)
Returns the percentage of matching classifications out of total classifications,
or `missing` if `all(iszero, confusion)`.
or `NaN` if `all(iszero, confusion)`.
Note that `accuracy(confusion)` is equivalent to overall percent agreement
between `confusion`'s row classifier and column classifier.
"""
function accuracy(confusion::AbstractMatrix)
total = sum(confusion)
total == 0 && return missing
total == 0 && return NaN
return tr(confusion) / total
end

Expand Down Expand Up @@ -78,15 +78,12 @@ function binary_statistics(confusion::AbstractMatrix, class_index::Integer)
false_negative_rate = (false_negatives == 0 && actual_positives == 0) ?
(zero(false_negatives) / one(actual_positives)) :
(false_negatives / actual_positives)
precision = (true_positives == 0 && predicted_positives == 0) ? missing :
precision = (true_positives == 0 && predicted_positives == 0) ? NaN :
(true_positives / predicted_positives)
return (predicted_positives=predicted_positives,
predicted_negatives=predicted_negatives, actual_positives=actual_positives,
actual_negatives=actual_negatives, true_positives=true_positives,
true_negatives=true_negatives, false_positives=false_positives,
false_negatives=false_negatives, true_positive_rate=true_positive_rate,
true_negative_rate=true_negative_rate, false_positive_rate=false_positive_rate,
false_negative_rate=false_negative_rate, precision=precision)
return (; predicted_positives, predicted_negatives, actual_positives, actual_negatives,
true_positives, true_negatives, false_positives, false_negatives,
true_positive_rate, true_negative_rate, false_positive_rate,
false_negative_rate, precision)
end

function binary_statistics(confusion::AbstractMatrix)
Expand All @@ -105,7 +102,8 @@ Return `(κ, p₀)` where `κ` is Cohen's kappa and `p₀` percent agreement giv
their equivalents in [`confusion_matrix`](@ref)).
"""
function cohens_kappa(class_count, hard_label_pairs)
all(issubset(pair, 1:class_count) for pair in hard_label_pairs) || throw(ArgumentError("Unexpected class in `hard_label_pairs`."))
all(issubset(pair, 1:class_count) for pair in hard_label_pairs) ||
throw(ArgumentError("Unexpected class in `hard_label_pairs`."))
p₀ = accuracy(confusion_matrix(class_count, hard_label_pairs))
pₑ = _probability_of_chance_agreement(class_count, hard_label_pairs)
return _cohens_kappa(p₀, pₑ), p₀
Expand Down Expand Up @@ -137,7 +135,7 @@ where:
- `bins` a vector with `bin_count` `Pairs` specifying the calibration curve's probability bins
- `fractions`: a vector where `fractions[i]` is the number of values in `probabilities`
that falls within `bin[i]` over the total number of values within `bin[i]`, or `missing`
that falls within `bin[i]` over the total number of values within `bin[i]`, or `NaN`
if the total number of values in `bin[i]` is zero.
- `totals`: a vector where `totals[i]` the total number of values within `bin[i]`.
- `mean_squared_error`: The mean squared error of `fractions` vs. an ideal calibration curve.
Expand All @@ -150,12 +148,12 @@ function calibration_curve(probabilities, bitmask; bin_count=10)
bins = probability_bins(bin_count)
per_bin = [fraction_within(probabilities, bitmask, bin...) for bin in bins]
fractions, totals = first.(per_bin), last.(per_bin)
nonempty_indices = findall(!ismissing, fractions)
nonempty_indices = findall(!isnan, fractions)
if !isempty(nonempty_indices)
ideal = range(mean(first(bins)), mean(last(bins)); length=length(bins))
mean_squared_error = mse(fractions[nonempty_indices], ideal[nonempty_indices])
else
mean_squared_error = missing
mean_squared_error = NaN
end
return (bins=bins, fractions=fractions, totals=totals,
mean_squared_error=mean_squared_error)
Expand All @@ -179,6 +177,6 @@ function fraction_within(values, bitmask, start, stop)
total += 1
end
end
fraction = iszero(total) ? missing : (count / total)
fraction = iszero(total) ? NaN : (count / total)
return (fraction=fraction, total=total)
end
133 changes: 133 additions & 0 deletions src/row.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# Arrow can't handle matrices---so when we write/read matrices, we have to pack and unpack them o_O
# https://github.com/apache/arrow-julia/issues/125
vec_to_mat(mat::AbstractMatrix) = mat

function vec_to_mat(vec::AbstractVector)
n = isqrt(length(vec))
return reshape(vec, n, n)
end

vec_to_mat(x::Missing) = return missing

# Redefinition is workaround for https://github.com/beacon-biosignals/Legolas.jl/issues/9
const EVALUATION_ROW_SCHEMA = Legolas.Schema("lighthouse.evaluation@1")

"""
const EvaluationRow = Legolas.@row("lighthouse.evaluation@1",
class_labels::Union{Missing,Vector{String}},
confusion_matrix::Union{Missing,Array{Int64}} = vec_to_mat(confusion_matrix),
discrimination_calibration_curve::Union{Missing,
Tuple{Vector{Float64},
Vector{Float64}}},
discrimination_calibration_score::Union{Missing,Float64},
multiclass_IRA_kappas::Union{Missing,Float64},
multiclass_kappa::Union{Missing,Float64},
optimal_threshold::Union{Missing,Float64},
optimal_threshold_class::Union{Missing,Int64},
per_class_IRA_kappas::Union{Missing,Vector{Float64}},
per_class_kappas::Union{Missing,Vector{Float64}},
stratified_kappas::Union{Missing,
Vector{NamedTuple{(:per_class,
:multiclass,
:n),
Tuple{Vector{Float64},
Float64,
Int64}}}},
per_class_pr_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_class_reliability_calibration_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_class_reliability_calibration_scores::Union{Missing,
Vector{Float64}},
per_class_roc_aucs::Union{Missing,Vector{Float64}},
per_class_roc_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_expert_discrimination_calibration_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_expert_discrimination_calibration_scores::Union{Missing,
Vector{Float64}},
spearman_correlation::Union{Missing,
NamedTuple{(:ρ, :n,
:ci_lower,
:ci_upper),
Tuple{Float64,
Int64,
Float64,
Float64}}},
thresholds::Union{Missing,Vector{Float64}})
EvaluationRow(evaluation_row_dict::Dict{String, Any}) -> EvaluationRow
A type alias for [`Legolas.Row{typeof(Legolas.Schema("lighthouse.evaluation@1@1"))}`](https://beacon-biosignals.github.io/Legolas.jl/stable/#Legolas.@row)
representing the output metrics computed by [`evaluation_metrics_row`](@ref) and
[`evaluation_metrics`](@ref).
Constructor that takes `evaluation_row_dict` converts [`evaluation_metrics`](@ref)
`Dict` of metrics results (e.g. from Lighthouse <v0.14.0) into an [`EvaluationRow`](@ref).
"""
const EvaluationRow = Legolas.@row("lighthouse.evaluation@1",
class_labels::Union{Missing,Vector{String}},
confusion_matrix::Union{Missing,Array{Int64}} = vec_to_mat(confusion_matrix),
discrimination_calibration_curve::Union{Missing,
Tuple{Vector{Float64},
Vector{Float64}}},
discrimination_calibration_score::Union{Missing,Float64},
multiclass_IRA_kappas::Union{Missing,Float64},
multiclass_kappa::Union{Missing,Float64},
optimal_threshold::Union{Missing,Float64},
optimal_threshold_class::Union{Missing,Int64},
per_class_IRA_kappas::Union{Missing,Vector{Float64}},
per_class_kappas::Union{Missing,Vector{Float64}},
stratified_kappas::Union{Missing,
Vector{NamedTuple{(:per_class,
:multiclass,
:n),
Tuple{Vector{Float64},
Float64,
Int64}}}},
per_class_pr_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_class_reliability_calibration_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_class_reliability_calibration_scores::Union{Missing,
Vector{Float64}},
per_class_roc_aucs::Union{Missing,Vector{Float64}},
per_class_roc_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_expert_discrimination_calibration_curves::Union{Missing,
Vector{Tuple{Vector{Float64},
Vector{Float64}}}},
per_expert_discrimination_calibration_scores::Union{Missing,
Vector{Float64}},
spearman_correlation::Union{Missing,
NamedTuple{(, :n,
:ci_lower,
:ci_upper),
Tuple{Float64,
Int64,
Float64,
Float64}}},
thresholds::Union{Missing,Vector{Float64}})

function Legolas.Row{S}(evaluation_row_dict::Dict) where {S<:Legolas.Schema{Symbol("lighthouse.evaluation"),
1}}
row = (; (Symbol(k) => v for (k, v) in pairs(evaluation_row_dict))...)
return EvaluationRow(row)
end

"""
_evaluation_row_dict(row::EvaluationRow) -> Dict{String,Any}
Convert [`EvaluationRow`](@ref) into `::Dict{String, Any}` results, as are
output by `[`evaluation_metrics`](@ref)` (and predated use of `EvaluationRow` in
Lighthouse <v0.14.0).
"""
function _evaluation_row_dict(row::EvaluationRow)
return Dict(string(k) => v for (k, v) in pairs(NamedTuple(row)) if !ismissing(v))
end
Loading

2 comments on commit 9e9de96

@hannahilea
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/56874

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.14.0 -m "<description of version>" 9e9de969cf711a191589beb0cb1f556c2f4cf8b9
git push origin v0.14.0

Please sign in to comment.