Skip to content

Commit

Permalink
Merge pull request #55 from jarq6c/continuous_metrics
Browse files Browse the repository at this point in the history
Add MSE and NSE methods to metrics subpackage
  • Loading branch information
jarq6c authored Apr 1, 2021
2 parents 062bc63 + b4857be commit 9470d0e
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 4 deletions.
90 changes: 88 additions & 2 deletions python/metrics/evaluation_tools/metrics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,98 @@
- percent_correct
- base_chance
- equitable_threat_score
- mean_squared_error
- nash_sutcliffe_efficiency
"""

import numpy as np
import numpy.typing as npt
import pandas as pd
from typing import Union

def mean_squared_error(
y_true: npt.ArrayLike,
y_pred: npt.ArrayLike,
root: bool = False
) -> float:
"""Compute the mean squared error, or optionally root mean squared error.
Parameters
----------
y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values, also called observations, measurements, or observed values.
y_pred: pandas.Series, required
Estimated target values, also called simulations or modeled values.
root: bool, default False
When True, return the root mean squared error.
Returns
-------
error: float
Mean squared error or root mean squared error.
"""
# Compute mean squared error
MSE = np.sum(np.subtract(y_true, y_pred) ** 2.0) / len(y_true)

# Return MSE, optionally return root mean squared error
if root:
return np.sqrt(MSE)
return MSE

def nash_sutcliffe_efficiency(
y_true: npt.ArrayLike,
y_pred: npt.ArrayLike,
log: bool = False,
normalized: bool = False
) -> float:
"""Compute the Nash–Sutcliffe model efficiency coefficient (NSE), also called the
mean squared error skill score or the R^2 (coefficient of determination) regression score.
Parameters
----------
y_true: array-like of shape (n_samples,) or (n_samples, n_outputs)
Ground truth (correct) target values, also called observations, measurements, or observed values.
y_pred: pandas.Series, required
Estimated target values, also called simulations or modeled values.
log: bool, default False
Apply numpy.log (natural logarithm) to y_true and y_pred
before computing the NSE.
normalized: bool, default False
When True, normalize the final NSE value using the method from
Nossent & Bauwens, 2012.
Returns
-------
score: float
Nash–Sutcliffe model efficiency coefficient
References
----------
Nash, J. E., & Sutcliffe, J. V. (1970). River flow forecasting through
conceptual models part I—A discussion of principles. Journal of
hydrology, 10(3), 282-290.
Nossent, J., & Bauwens, W. (2012, April). Application of a normalized
Nash-Sutcliffe efficiency to improve the accuracy of the Sobol'
sensitivity analysis of a hydrological model. In EGU General Assembly
Conference Abstracts (p. 237).
"""
# Optionally transform components
if log:
y_true = np.log(y_true)
y_pred = np.log(y_pred)

# Compute components
numerator = mean_squared_error(y_true, y_pred)
denominator = mean_squared_error(y_true, np.mean(y_true))

# Compute score, optionally normalize
if normalized:
return 1.0 / (1.0 + numerator/denominator)
return 1.0 - numerator/denominator

def compute_contingency_table(
observed: pd.Series,
simulated: pd.Series,
Expand All @@ -38,9 +124,9 @@ def compute_contingency_table(
Parameters
----------
observed: pandas.Series, required
pandas.Series of boolean pandas.Categorical values indicating observed occurences
pandas.Series of boolean pandas.Categorical values indicating observed occurrences
simulated: pandas.Series, required
pandas.Series of boolean pandas.Categorical values indicating simulated occurences
pandas.Series of boolean pandas.Categorical values indicating simulated occurrences
true_positive_key: str, optional, default 'true_positive'
Label to use for true positives.
false_positive_key: str, optional, default 'false_positive'
Expand Down
2 changes: 1 addition & 1 deletion python/metrics/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
SUBPACKAGE_SLUG = f"{NAMESPACE_PACKAGE_NAME}.{SUBPACKAGE_NAME}"

# Subpackage version
VERSION = "0.1.1+1"
VERSION = "0.1.2+1"

# Package author information
AUTHOR = "Jason Regina"
Expand Down
28 changes: 28 additions & 0 deletions python/metrics/tests/test_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pandas as pd
from math import isclose
import numpy as np

contigency_table = {
'true_positive': 1,
Expand All @@ -18,6 +19,9 @@
'TN': 4
}

y_true = [1., 2., 3., 4.]
y_pred = [4., 3., 2., 1.]

def test_compute_contingency_table():
obs = pd.Categorical([True, False, False, True, True, True,
False, False, False, False])
Expand Down Expand Up @@ -132,3 +136,27 @@ def test_equitable_threat_score():
true_negative_key='TN'
)
assert isclose(ETS, (-0.2/4.8), abs_tol=0.000001)

def test_mean_squared_error():
MSE = metrics.mean_squared_error(y_true, y_pred)
assert MSE == 5.0

RMSE = metrics.mean_squared_error(y_true, y_pred, root=True)
assert RMSE == np.sqrt(5.0)

def test_nash_sutcliffe_efficiency():
NSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred)
assert NSE == -3.0

NNSE = metrics.nash_sutcliffe_efficiency(y_true, y_pred,
normalized=True)
assert NNSE == 0.2

NSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true),
np.exp(y_pred), log=True)
assert NSEL == -3.0

NNSEL = metrics.nash_sutcliffe_efficiency(np.exp(y_true),
np.exp(y_pred), log=True, normalized=True)
assert NNSEL == 0.2
print(NNSEL)
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
MAINTAINER_EMAIL = "arthur.raney@noaa.gov"

# Namespace package version
VERSION = "1.3.4+1"
VERSION = "1.3.5+1"
URL = "https://github.com/NOAA-OWP/evaluation_tools"

# Map subpackage namespace to relative location
Expand Down

0 comments on commit 9470d0e

Please sign in to comment.