Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixing bug in the MUL / ADD treatment when writing down closure data #2045

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 12 additions & 5 deletions validphys2/src/validphys/coredata.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Data containers backed by Python managed memory (Numpy arrays and Pandas
dataframes).
"""

import dataclasses
import logging
from typing import Optional
Expand Down Expand Up @@ -397,24 +398,29 @@ def export_uncertainties(self, buffer):
"""Exports the uncertainties defined by this commondata instance to the given buffer"""
definitions = {}
for idx, row in self.systype_table.iterrows():
definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}
if row["name"] != "SKIP":
definitions[f"sys_{idx}"] = {"treatment": row["treatment"], "type": row["name"]}

# Order the definitions by treatment as ADD, MULT
# TODO: make it so that it corresponds to the original order exactly
sorted_definitions = {
k: v for k, v in sorted(definitions.items(), key=lambda item: item[1]["treatment"])
}
bins = []
for idx, row in self.systematic_errors().iterrows():
tmp = {"stat": float(self.stat_errors[idx])}
# Hope things come in the right order...
for key_name, val in zip(definitions, row):
for key_name, val in zip(sorted_definitions, row):
tmp[key_name] = float(val)

bins.append(tmp)

definitions["stat"] = {
sorted_definitions["stat"] = {
"description": "Uncorrelated statistical uncertainties",
"treatment": "ADD",
"type": "UNCORR",
}

ret = {"definitions": definitions, "bins": bins}
ret = {"definitions": sorted_definitions, "bins": bins}
yaml.safe_dump(ret, buffer)

def export(self, folder_path):
Expand All @@ -430,3 +436,4 @@ def export(self, folder_path):
# Export data and uncertainties
self.export_data(data_path.open("w", encoding="utf-8"))
self.export_uncertainties(unc_path.open("w", encoding="utf-8"))
return data_path, unc_path
54 changes: 48 additions & 6 deletions validphys2/src/validphys/tests/test_commondataparser.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import pytest
import numpy as np
import pandas as pd
import pytest

from validphys.api import API
from validphys.commondataparser import load_commondata
from validphys.covmats_utils import construct_covmat
from validphys.loader import FallbackLoader as Loader
from validphys.tests.conftest import THEORYID_NEW, FIT
from validphys.tests.conftest import FIT, THEORYID_NEW


def test_basic_commondata_loading():
Expand All @@ -19,7 +21,9 @@ def test_basic_commondata_loading():
assert isinstance(res.systype_table, pd.DataFrame)

# Test a dataset with no systematics
emptysyscd = l.check_posset(theoryID=THEORYID_NEW, setname='NNPDF_POS_2P24GEV_XDQ', postlambda=1e-10)
emptysyscd = l.check_posset(
theoryID=THEORYID_NEW, setname='NNPDF_POS_2P24GEV_XDQ', postlambda=1e-10
)
emptysysres = load_commondata(emptysyscd.commondata)
assert emptysysres.nsys == 0
assert emptysysres.systype_table.empty is True
Expand All @@ -33,9 +37,7 @@ def test_commondata_with_cuts():
loaded_cd = load_commondata(cd)

fit_cuts = l.check_fit_cuts(fit=FIT, commondata=cd)
internal_cuts = l.check_internal_cuts(
cd, API.rules(theoryid=THEORYID_NEW, use_cuts="internal")
)
internal_cuts = l.check_internal_cuts(cd, API.rules(theoryid=THEORYID_NEW, use_cuts="internal"))

loaded_cd_fit_cuts = loaded_cd.with_cuts(fit_cuts)
# We must do these - 1 subtractions due to the fact that cuts indexing
Expand All @@ -61,3 +63,43 @@ def test_commondata_with_cuts():
bad_cuts = l.check_fit_cuts(fit=FIT, commondata=cd_bad)
with pytest.raises(ValueError):
loaded_cd.with_cuts(bad_cuts)


def test_commondata_load_write_load(tmp):
"""Test that we can a commondata, write it down, and load it again"""
l = Loader()

# Select a dataset that we know mixes ADD and MULT (so that the ordering is checked)
setname = "ATLAS_2JET_7TEV_R06_M12Y"
# And a complicated variant
variant = "legacy"

# Get a reference to the commondata
cd = l.check_commondata(setname=setname, variant=variant)

# Load it up, save the covmat, and write it down
original_data = cd.load()
data_path, unc_path = original_data.export(tmp)

# Now, reload it with the new data/unc paths
new_data = cd.with_modified_data(data_path, uncertainties_file=unc_path).load()

# central value!
original_cv = original_data.central_values.to_numpy()
new_cv = new_data.central_values.to_numpy()
np.testing.assert_allclose(original_cv, new_cv)

# stats!
original_stats = original_data.stat_errors.to_numpy()
new_stats = new_data.stat_errors.to_numpy()
np.testing.assert_allclose(original_stats, new_stats)

# Create fake data in order to check whether the covmats are truly the same
# the fake data ensures that the MULT and ADD are treated in the same way in both
# otherwise, since the data is saved wrt to the original central value, the test will always pass
fake_unc = np.diag(construct_covmat(original_stats, original_data.systematic_errors()))
fake_data = np.random.rand(len(original_stats)) * fake_unc + original_cv

new_covmat = construct_covmat(new_stats, new_data.systematic_errors(fake_data))
original_covmat = construct_covmat(original_stats, original_data.systematic_errors(fake_data))
np.testing.assert_allclose(new_covmat, original_covmat)