Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use python convolutions also for positivity predictions #1510

Merged
merged 6 commits into from
Feb 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 0 additions & 24 deletions validphys2/src/validphys/convolution.py
Original file line number Diff line number Diff line change
Expand Up @@ -397,27 +397,3 @@ def central_dis_predictions(loaded_fk, pdf):
observables."""
gv = functools.partial(evolution.central_grid_values, pdf=pdf)
return _gv_dis_predictions(loaded_fk, gv)


def _positivity_predictions(posdataset, pdf, fkfunc):
"""Implentation of :py:func:`_predictions` but for positivity
datasets."""
return fkfunc(load_fktable(posdataset.fkspec), pdf)


def positivity_predictions(posdataset, pdf):
"""Implementation of :py:func:`predictions` but for positivity
datasets."""
return _positivity_predictions(posdataset, pdf, fk_predictions)


def linear_positivity_predictions(posdataset, pdf):
"""Implmentation of :py:func:`linear_predictions` but for positivity
datasets."""
return _positivity_predictions(posdataset, pdf, linear_fk_predictions)


def central_positivity_predictions(posdataset, pdf):
"""Implementation as :py:func:`central_predictions`, but for postivity datasets
"""
return _positivity_predictions(posdataset, pdf, central_fk_predictions)
29 changes: 15 additions & 14 deletions validphys2/src/validphys/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -552,26 +552,27 @@ def __init__(self, fkpath, cfactors):
def load(self):
return FKTable(str(self.fkpath), [str(factor) for factor in self.cfactors])

class PositivitySetSpec(TupleComp):
def __init__(self, name ,commondataspec, fkspec, maxlambda, thspec):
self.name = name
self.commondataspec = commondataspec
self.fkspec = fkspec
self.maxlambda = maxlambda
self.thspec = thspec
super().__init__(name, commondataspec, fkspec, maxlambda, thspec)


class PositivitySetSpec(DataSetSpec):
"""Extends DataSetSpec to work around the particularities of the positivity datasets"""

def __str__(self):
return self.name
def __init__(self, name, commondataspec, fkspec, maxlambda, thspec):
cuts = Cuts(commondataspec, None)
self.maxlambda = maxlambda
super().__init__(name=name, commondata=commondataspec, fkspecs=fkspec, thspec=thspec, cuts=cuts)
if len(self.fkspecs) > 1:
# The signature of the function does not accept operations either
# so more than one fktable cannot be utilized
raise ValueError("Positivity datasets can only contain one fktable")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why?

Copy link
Member Author

@scarlehoff scarlehoff Feb 3, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't know why. But it seems to be the case (it is even called fkspec instead of fkspecs for positivity)


@functools.lru_cache()
def load(self):
cd = self.commondataspec.load()
fk = self.fkspec.load()
cd = self.commondata.load()
fk = self.fkspecs[0].load()
return PositivitySet(cd, fk, self.maxlambda)

def to_unweighted(self):
log.warning("Trying to unweight %s, PositivitySetSpec are always unweighted", self.name)
return self


#We allow to expand the experiment as a list of datasets
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/dataplots.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,7 +1000,7 @@ def plot_positivity(pdfs, positivity_predictions_for_pdfs, posdataset, pos_use_k
def _check_same_posdataset_name(dataspecs_posdataset):
"""Check that the ``posdataset`` key matches for ``dataspecs``"""
_check_same_dataset_name.__wrapped__(
[ds.commondataspec for ds in dataspecs_posdataset]
[ds.commondata for ds in dataspecs_posdataset]
)

@figure
Expand Down
4 changes: 1 addition & 3 deletions validphys2/src/validphys/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,9 +171,7 @@ def from_convolution(cls, pdf, dataset):
class PositivityResult(StatsResult):
@classmethod
def from_convolution(cls, pdf, posset):
loaded_pdf = pdf.legacy_load()
loaded_pos = posset.load()
data = loaded_pos.GetPredictions(loaded_pdf)
data = predictions(posset, pdf)
stats = pdf.stats_class(data.T)
return cls(stats)

Expand Down
2 changes: 2 additions & 0 deletions validphys2/src/validphys/tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ def tmp(tmpdir):
{'dataset': 'NMC', 'weight': 100},
]

POSITIVITIES = ["POSDYCBD", "POSF2S"]

PDF = "NNPDF40_nnlo_as_01180"
HESSIAN_PDF = "NNPDF40_nnlo_as_01180_hessian"
THEORYID = 162
Expand Down
2 changes: 1 addition & 1 deletion validphys2/src/validphys/tests/test_commondataparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_basic_commondata_loading():

# Test a dataset with no systematics
emptysyscd = l.check_posset(theoryID=THEORYID, setname='POSDYCBD', postlambda=1e-10)
emptysysres = load_commondata(emptysyscd.commondataspec)
emptysysres = load_commondata(emptysyscd.commondata)
assert emptysysres.nsys == 0
assert emptysysres.systype_table.empty is True

Expand Down
92 changes: 63 additions & 29 deletions validphys2/src/validphys/tests/test_pyfkdata.py
Original file line number Diff line number Diff line change
@@ -1,78 +1,112 @@
import pytest
import pandas as pd
import numpy as np
from numpy.testing import assert_allclose

from validphys.api import API
from validphys.loader import Loader
from validphys.results import ThPredictionsResult
from validphys.results import ThPredictionsResult, PositivityResult
from validphys.fkparser import load_fktable
from validphys.convolution import predictions, central_predictions, linear_predictions
from validphys.tests.conftest import PDF, THEORYID
from validphys.tests.conftest import PDF, HESSIAN_PDF, THEORYID, POSITIVITIES


def test_basic_loading():
l = Loader()
# Test both with and without cfactors, and load both DIS and hadronic
for cfac in ((), ('QCD',)):
fk = l.check_fktable(setname='ATLASTTBARTOT', theoryID=THEORYID, cfac=cfac)
for cfac in ((), ("QCD",)):
fk = l.check_fktable(setname="ATLASTTBARTOT", theoryID=THEORYID, cfac=cfac)
res = load_fktable(fk)
assert res.ndata == 3
assert isinstance(res.sigma, pd.DataFrame)
fk = l.check_fktable(setname='H1HERAF2B', theoryID=THEORYID, cfac=())
fk = l.check_fktable(setname="H1HERAF2B", theoryID=THEORYID, cfac=())
res = load_fktable(fk)
assert res.ndata == 12
assert isinstance(res.sigma, pd.DataFrame)

# Check if cfactors for datasets having one entry are correctly parsed
fk = l.check_fktable(setname='CMSTTBARTOT7TEV', theoryID=THEORYID, cfac=('QCD',))
fk = l.check_fktable(setname="CMSTTBARTOT7TEV", theoryID=THEORYID, cfac=("QCD",))
res = load_fktable(fk)
assert res.ndata == 1


def test_cuts():
l = Loader()
ds = l.check_dataset('ATLASTTBARTOT', theoryid=THEORYID, cfac=('QCD',))
ds = l.check_dataset("ATLASTTBARTOT", theoryid=THEORYID, cfac=("QCD",))
table = load_fktable(ds.fkspecs[0])
# Check explicit cuts
newtable = table.with_cuts([0, 1])
assert set(newtable.sigma.index.get_level_values(0)) == {0, 1}
assert newtable.ndata == 2
assert newtable.metadata['GridInfo'].ndata == ds.commondata.ndata
assert newtable.metadata["GridInfo"].ndata == ds.commondata.ndata
# Check empty cuts
assert newtable.with_cuts(None) is newtable
# Check loaded cuts
ds = l.check_dataset('H1HERAF2B', theoryid=THEORYID)
ds = l.check_dataset("H1HERAF2B", theoryid=THEORYID)
table = load_fktable(ds.fkspecs[0])
newtable = table.with_cuts(ds.cuts)
assert len(newtable.sigma.index.get_level_values(0).unique()) == len(ds.cuts.load())


def test_predictions():
@pytest.mark.parametrize("pdf_name", [PDF, HESSIAN_PDF])
def test_predictions(pdf_name):
"""Test that the ThPredictionsResult class do not break the raw predictions
coming from the convolution module and that they are compatible with the API result"""
l = Loader()
pdf = l.check_pdf(PDF)
dss = [
l.check_dataset(
'ATLASTTBARTOT', theoryid=THEORYID, cfac=('QCD',)
), # cfactors
l.check_dataset('H1HERAF2B', theoryid=THEORYID), # DIS, op: NULL
l.check_dataset('D0ZRAP', theoryid=THEORYID), # op: RATIO
l.check_dataset('D0WEASY', theoryid=THEORYID), # op: ASY
l.check_dataset('CMSWCHARMTOT', theoryid=THEORYID), # op: ADD
l.check_dataset('ATLASWPT31PB', theoryid=THEORYID), # op: SMN
l.check_dataset('DYE906R', theoryid=THEORYID), # op: COM
l.check_dataset('DYE906_D', theoryid=THEORYID), # op: SMT
pdf = l.check_pdf(pdf_name)
datasets = [
{"name": "ATLASTTBARTOT", "cfac": ("QCD",)}, # cfactors
{"name": "H1HERAF2B"}, # DIS, op: NULL
{"name": "D0ZRAP"}, # op: RATIO
{"name": "D0WEASY"}, # op: ASY
{"name": "CMSWCHARMTOT"}, # op: ADD
{"name": "ATLASWPT31PB"}, # op: SMN
{"name": "DYE906R"}, # op: COM <----
{"name": "DYE906_D"}, # op: SMT <----
]
for ds in dss:
for daset in datasets:
ds = l.check_dataset(**daset, theoryid=THEORYID)
preds = predictions(ds, pdf)
cppres = ThPredictionsResult.from_convolution(pdf, ds)
# Change the atol and rtol from 1e-8 and 1e-7 since DYE906R
# fails with the default setting
assert_allclose(preds.values, cppres._rawdata, atol=1e-8, rtol=1e-3)
core_predictions = ThPredictionsResult.from_convolution(pdf, ds)
assert_allclose(preds.values, core_predictions._rawdata, atol=1e-8, rtol=1e-3)
# Now check that the stats class does the right thing with the data
cv_predictions = central_predictions(ds, pdf).squeeze()
stats_predictions = pdf.stats_class(preds.T)
# rtol to 1e-2 due to DYE906R and DYE906_D for MC sets
# TODO: check whether this tolerance can be decreased when using double precision
assert_allclose(cv_predictions, stats_predictions.central_value(), rtol=1e-2)


@pytest.mark.parametrize("pdf_name", [PDF, HESSIAN_PDF])
def test_positivity(pdf_name):
"""Test that the PositivityResult is sensible and like test_predictions
that no internal step modifies the result and that they are compatible
with the API result
"""
l = Loader()
pdf = l.check_pdf(pdf_name)
for posset in POSITIVITIES:
# Use the loader to load the positivity dataset
ps = l.check_posset(setname=posset, theoryID=THEORYID, postlambda=1e6)
preds = predictions(ps, pdf)
core_predictions = PositivityResult.from_convolution(pdf, ps)
assert_allclose(preds.values, core_predictions.rawdata.T)
# Now do the same with the API
api_predictions = API.positivity_predictions_data_result(
theoryid=THEORYID, pdf=pdf_name, posdataset={"dataset": posset, "maxlambda": 1e6}
)
assert_allclose(preds.values, api_predictions.rawdata.T)
# And now check that the results are correct for any kind of PDF
cv_predictions = central_predictions(ps, pdf).squeeze()
assert_allclose(cv_predictions, api_predictions.central_value, atol=1e-3)


def test_extended_predictions():
"""Test the python predictions dataframe stasts with MC sets"""
l = Loader()
pdf = l.check_pdf(PDF)
had = l.check_dataset('ATLASTTBARTOT', theoryid=THEORYID, cfac=('QCD',))
dis = l.check_dataset('H1HERAF2B', theoryid=THEORYID)
had = l.check_dataset("ATLASTTBARTOT", theoryid=THEORYID, cfac=("QCD",))
dis = l.check_dataset("H1HERAF2B", theoryid=THEORYID)
dis_all = predictions(dis, pdf).T
dis_central = central_predictions(dis, pdf).T
assert np.allclose(dis_all.mean().values, dis_central.values)
Expand Down