From 2bbff1aa829e410739b320426ab27135d374019e Mon Sep 17 00:00:00 2001 From: juacrumar Date: Tue, 17 Sep 2024 11:00:37 +0200 Subject: [PATCH] minimal modifications so that test and examples run without old commondata, remove the option to force reading an old commondata --- doc/sphinx/source/data/data-config.rst | 4 +- nnpdf_data/nnpdf_data/__init__.py | 24 ++- nnpdf_data/pyproject.toml | 4 +- pyproject.toml | 3 + .../examples/data_theory_comparison.yaml | 6 +- validphys2/examples/dataspecs.yaml | 6 +- validphys2/examples/future_test_example.yaml | 149 +++++++++--------- validphys2/examples/generate_a_report.yaml | 8 +- validphys2/examples/spiderplot_dataspecs.yaml | 2 +- validphys2/src/validphys/commondataparser.py | 2 +- validphys2/src/validphys/coredata.py | 2 +- validphys2/src/validphys/loader.py | 137 +++------------- validphys2/src/validphys/tests/conftest.py | 6 +- validphys2/src/validphys/tests/test_loader.py | 5 +- .../src/validphys/tests/test_pseudodata.py | 2 +- .../src/validphys/tests/test_pyfkdata.py | 91 ++++------- 16 files changed, 164 insertions(+), 287 deletions(-) diff --git a/doc/sphinx/source/data/data-config.rst b/doc/sphinx/source/data/data-config.rst index ae44d3a6fa..8d249a56bc 100644 --- a/doc/sphinx/source/data/data-config.rst +++ b/doc/sphinx/source/data/data-config.rst @@ -19,7 +19,7 @@ Experimental data storage The central repository for ``CommonData`` in use by ``nnpdf`` projects is located in the ``nnpdf`` git repository at - ``nnpdf_data/nnpdf_data/new_commondata`` + ``nnpdf_data/nnpdf_data/commondata`` where a separate ``CommonData`` file is stored for each *Dataset* with the filename format described in :ref:`dataset-naming-convention`. @@ -45,7 +45,7 @@ The following lines will check whether a newly added theory can be read by valid (change 700 by the id of your newly added theory). .. code-block:: python - + from nnpdf_data import theory_cards from nnpdf_data.theorydbutils import fetch_theory theory = fetch_theory(theory_cards, 700) diff --git a/nnpdf_data/nnpdf_data/__init__.py b/nnpdf_data/nnpdf_data/__init__.py index 70e91afdad..b7f2a72295 100644 --- a/nnpdf_data/nnpdf_data/__init__.py +++ b/nnpdf_data/nnpdf_data/__init__.py @@ -6,13 +6,18 @@ from ._version import __version__ path_vpdata = pathlib.Path(__file__).parent -path_commondata = path_vpdata / "new_commondata" +path_commondata = path_vpdata / "commondata" # VP should not have access to this file, only to the products _path_legacy_mapping = path_commondata / "dataset_names.yml" -legacy_to_new_mapping = yaml.YAML().load(_path_legacy_mapping) theory_cards = path_vpdata / "theory_cards" +_legacy_to_new_mapping_raw = yaml.YAML().load(_path_legacy_mapping) +# Convert strings into a dictionary +legacy_to_new_mapping = { + k: ({"dataset": v} if isinstance(v, str) else v) for k, v in _legacy_to_new_mapping_raw.items() +} + @lru_cache def legacy_to_new_map(dataset_name, sys=None): @@ -22,13 +27,6 @@ def legacy_to_new_map(dataset_name, sys=None): return dataset_name, None new_name = legacy_to_new_mapping[dataset_name] - if isinstance(new_name, str): - if sys is not None: - raise KeyError( - f"I cannot translate the combination of {dataset_name} and sys: {sys}. Please report this." - ) - return new_name, None - variant = new_name.get("variant") new_name = new_name["dataset"] if sys is not None: @@ -48,11 +46,9 @@ def new_to_legacy_map(dataset_name, variant_used): # we can have 2 old dataset mapped to the same new one possible_match = None - for old_name, new_name in legacy_to_new_mapping.items(): - variant = None - if not isinstance(new_name, str): - variant = new_name.get("variant") - new_name = new_name["dataset"] + for old_name, new_info in legacy_to_new_mapping.items(): + new_name = new_info["dataset"] + variant = new_info.get("variant") if new_name == dataset_name: if variant_used == variant: diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml index ab828cf652..70d9749b24 100644 --- a/nnpdf_data/pyproject.toml +++ b/nnpdf_data/pyproject.toml @@ -22,8 +22,8 @@ repository = "https://github.com/NNPDF/nnpdf_data" # Exclude intermediate data files exclude = [ - "nnpdf_data/new_commondata/*/rawdata", - "nnpdf_data/new_commondata/*/*.py", + "nnpdf_data/commondata/*/rawdata", + "nnpdf_data/commondata/*/*.py", ] # Data files include = [ diff --git a/pyproject.toml b/pyproject.toml index cb3809b892..104b979d69 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -140,3 +140,6 @@ profile = "black" # https://black.readthedocs.io/en/stable/guides/using_black_wi skip_gitignore = true known_first_party = ["validphys", "eko", "n3fit", "nnpdf_data", "reportengine"] force_sort_within_sections = true + +[tool.pytest.ini_options] +addopts = "--disable-warnings" diff --git a/validphys2/examples/data_theory_comparison.yaml b/validphys2/examples/data_theory_comparison.yaml index d726aa7f9b..f390b6d377 100644 --- a/validphys2/examples/data_theory_comparison.yaml +++ b/validphys2/examples/data_theory_comparison.yaml @@ -4,10 +4,10 @@ meta: author: Lazy Person pdfs: - - id: NNPDF40_nnlo_lowprecision - label: NNPDF40_nnlo_lowprecision + - id: NNPDF40_nnlo_low_precision + label: NNPDF40_nnlo_low_precision -theoryid: 162 +theoryid: 399 use_cuts: "internal" diff --git a/validphys2/examples/dataspecs.yaml b/validphys2/examples/dataspecs.yaml index 54d9c2c881..a45d5ab93e 100644 --- a/validphys2/examples/dataspecs.yaml +++ b/validphys2/examples/dataspecs.yaml @@ -26,10 +26,10 @@ dataspecs: speclabel: "NLO" fit: NNPDF40_nlo_as_01180 - - theoryid: 162 - pdf: NNPDF40_nnlo_lowprecision + - theoryid: 399 + pdf: NNPDF40_nnlo_low_precision_240916 speclabel: "NNLO" - fit: NNPDF40_nnlo_lowprecision + fit: NNPDF40_nnlo_low_precision_240916 template_text: | diff --git a/validphys2/examples/future_test_example.yaml b/validphys2/examples/future_test_example.yaml index abd79e5cd9..108360c6f3 100644 --- a/validphys2/examples/future_test_example.yaml +++ b/validphys2/examples/future_test_example.yaml @@ -12,8 +12,8 @@ my_description: from_: meta future: - pdf: {id: 210219-01-rs-nnpdf40-baseline, label: "NNPDF4.0"} - fit: {id: 210219-01-rs-nnpdf40-baseline, label: "NNPDF4.0"} + pdf: {id: 240516-jcm-nnpdf40-like, label: "NNPDF4.0"} + fit: {id: 240516-jcm-nnpdf40-like, label: "NNPDF4.0"} speclabel: "NNPDF4.0 fit" description: @@ -41,77 +41,80 @@ past: from_: fit dataset_inputs: - - {dataset: NMCPD_dw, custom_group: "datasets pre HERA"} - - {dataset: NMC, custom_group: "datasets pre HERA"} - - {dataset: SLACP_dwsh, custom_group: "datasets pre HERA"} - - {dataset: SLACD_dw, custom_group: "datasets pre HERA"} - - {dataset: BCDMSP_dwsh, custom_group: "datasets pre HERA"} - - {dataset: BCDMSD_dw, custom_group: "datasets pre HERA"} - - {dataset: CHORUSNUPb_dw, custom_group: "datasets pre HERA"} - - {dataset: CHORUSNBPb_dw, custom_group: "datasets pre HERA"} - - {dataset: NTVNUDMNFe_dw, custom_group: "datasets pre HERA", cfac: [MAS]} - - {dataset: NTVNBDMNFe_dw, custom_group: "datasets pre HERA", cfac: [MAS]} - - {dataset: HERACOMBNCEM, custom_group: "datasets pre LHC"} - - {dataset: HERACOMBNCEP460, custom_group: "datasets pre LHC"} - - {dataset: HERACOMBNCEP575, custom_group: "datasets pre LHC"} - - {dataset: HERACOMBNCEP820, custom_group: "datasets pre LHC"} - - {dataset: HERACOMBNCEP920, custom_group: "datasets pre LHC"} - - {dataset: HERACOMBCCEM, custom_group: "datasets pre LHC"} - - {dataset: HERACOMBCCEP, custom_group: "datasets pre LHC"} - - {dataset: HERACOMB_SIGMARED_C, custom_group: "datasets pre LHC"} - - {dataset: HERACOMB_SIGMARED_B, custom_group: "datasets pre LHC"} - - {dataset: DYE886R_dw, custom_group: "datasets pre HERA"} - - {dataset: DYE886P, custom_group: "datasets pre HERA", cfac: [QCD]} - - {dataset: DYE605_dw, custom_group: "datasets pre HERA", cfac: [QCD]} - - {dataset: CDFZRAP_NEW, custom_group: "datasets pre LHC", cfac: [QCD]} - - {dataset: D0ZRAP, custom_group: "datasets pre LHC", cfac: [QCD]} - - {dataset: D0WMASY, custom_group: "datasets pre LHC", cfac: [QCD]} - - {dataset: ATLASWZRAP36PB, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASZHIGHMASS49FB, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASLOMASSDY11EXT, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASWZRAP11CC, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASWZRAP11CF, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASDY2D8TEV, custom_group: "NNPDF40 datasets", cfac: [QCDEWK]} - - {dataset: ATLAS_WZ_TOT_13TEV, custom_group: "NNPDF40 datasets", cfac: [NRM, QCD]} - - {dataset: ATLAS_WP_JET_8TEV_PT, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_WM_JET_8TEV_PT, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASZPT8TEVMDIST, custom_group: "NNPDF40 datasets", cfac: [QCD], sys: 10} - - {dataset: ATLASZPT8TEVYDIST, custom_group: "NNPDF40 datasets", cfac: [QCD], sys: 10} - - {dataset: ATLASTTBARTOT, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_1JET_8TEV_R06_DEC, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_2JET_7TEV_R06, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLASPHT15, custom_group: "NNPDF40 datasets", cfac: [QCD, EWK]} - - {dataset: ATLAS_SINGLETOP_TCH_R_7TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_SINGLETOP_TCH_R_13TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSWEASY840PB, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSWMASY47FB, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSDY2D11, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSWMU8TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSZDIFF12, custom_group: "NNPDF40 datasets", cfac: [QCD, NRM], sys: 10} - - {dataset: CMS_2JET_7TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_2JET_3D_8TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSTTBARTOT, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSTOPDIFF8TEVTTRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMSTTBARTOT5TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_TTB_DIFF_13TEV_2016_2L_TRAP, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_TTB_DIFF_13TEV_2016_LJ_TRAP, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_SINGLETOP_TCH_TOT_7TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_SINGLETOP_TCH_R_8TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: CMS_SINGLETOP_TCH_R_13TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: LHCBZ940PB, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: LHCBZEE2FB, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: LHCBWZMU7TEV, custom_group: "NNPDF40 datasets", cfac: [NRM, QCD]} - - {dataset: LHCBWZMU8TEV, custom_group: "NNPDF40 datasets", cfac: [NRM, QCD]} - - {dataset: LHCB_Z_13TEV_DIMUON, custom_group: "NNPDF40 datasets", cfac: [QCD]} - - {dataset: LHCB_Z_13TEV_DIELECTRON, custom_group: "NNPDF40 datasets", cfac: [QCD]} + - {dataset: NMC_NC_NOTFIXED_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy} + - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, custom_group: datasets pre HERA, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_P_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy} + - {dataset: SLAC_NC_NOTFIXED_D_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy} + - {dataset: BCDMS_NC_NOTFIXED_P_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy} + - {dataset: BCDMS_NC_NOTFIXED_D_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy} + - {dataset: CHORUS_CC_NOTFIXED_PB_DW_NU-SIGMARED, custom_group: datasets pre HERA, variant: legacy} + - {dataset: CHORUS_CC_NOTFIXED_PB_DW_NB-SIGMARED, custom_group: datasets pre HERA, variant: legacy} + - {dataset: NUTEV_CC_NOTFIXED_FE_DW_NU-SIGMARED, custom_group: datasets pre HERA, cfac: [MAS], variant: legacy} + - {dataset: NUTEV_CC_NOTFIXED_FE_DW_NB-SIGMARED, custom_group: datasets pre HERA, cfac: [MAS], variant: legacy} + - {dataset: HERA_NC_318GEV_EM-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_NC_225GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_NC_251GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_NC_300GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_NC_318GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_CC_318GEV_EM-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_CC_318GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, custom_group: datasets pre LHC, variant: legacy} + - {dataset: DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO, custom_group: datasets pre HERA, variant: legacy} + - {dataset: DYE866_Z0_800GEV_PXSEC, custom_group: datasets pre HERA, cfac: [QCD], variant: legacy} + - {dataset: DYE605_Z0_38P8GEV_DW_PXSEC, custom_group: datasets pre HERA, cfac: [QCD], variant: legacy} + - {dataset: CDF_Z0_1P96TEV_ZRAP, custom_group: datasets pre LHC, cfac: [QCD], variant: legacy} + - {dataset: D0_Z0_1P96TEV_ZRAP, custom_group: datasets pre LHC, cfac: [QCD], variant: legacy} + - {dataset: D0_WPWM_1P96TEV_ASY, custom_group: datasets pre LHC, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_DY_7TEV_36PB_ETA, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_Z0_7TEV_49FB_HIMASS, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_Z0_7TEV_LOMASS_M, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_DY_7TEV_46FB_CC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_Z0_7TEV_46FB_CF-Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_Z0_8TEV_HIMASS_M-Y, custom_group: NNPDF40 datasets, cfac: [QCDEWK], variant: legacy} + - {dataset: ATLAS_DY_13TEV_TOT, custom_group: NNPDF40 datasets, cfac: [NRM, QCD], variant: legacy} + - {dataset: ATLAS_WJ_8TEV_WP-PT, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_WJ_8TEV_WM-PT, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_Z0J_8TEV_PT-M, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_Z0J_8TEV_PT-Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy_10} + - {dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy} + - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy} + - {dataset: ATLAS_TTBAR_13TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy} + - {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YT-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_TTBAR_8TEV_2L_DIF_YTTBAR-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_1JET_8TEV_R06_PTY, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy_decorrelated} + - {dataset: ATLAS_2JET_7TEV_R06_M12Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_PH_13TEV_XSEC, custom_group: "NNPDF40 datasets", cfac: [QCD, EWK], variant: legacy} + - {dataset: ATLAS_SINGLETOP_7TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_SINGLETOP_13TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_SINGLETOP_7TEV_T-Y-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_SINGLETOP_7TEV_TBAR-Y-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_SINGLETOP_8TEV_T-RAP-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: ATLAS_SINGLETOP_8TEV_TBAR-RAP-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_WPWM_7TEV_ELECTRON_ASY, custom_group: NNPDF40 datasets, cfac: [QCD]} + - {dataset: CMS_WPWM_7TEV_MUON_ASY, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_Z0_7TEV_DIMUON_2D, custom_group: NNPDF40 datasets, cfac: [QCD]} + - {dataset: CMS_WPWM_8TEV_MUON_Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_Z0J_8TEV_PT-Y, custom_group: NNPDF40 datasets, cfac: [QCD, NRM], variant: legacy_10} + - {dataset: CMS_2JET_7TEV_M12Y, custom_group: NNPDF40 datasets, cfac: [QCD]} + - {dataset: CMS_TTBAR_7TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_8TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_13TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_5TEV_TOT_X-SEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_8TEV_2L_DIF_MTTBAR-YT-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_13TEV_2L_DIF_YT, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_TTBAR_13TEV_LJ_2016_DIF_YTTBAR, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_SINGLETOP_7TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_SINGLETOP_8TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: CMS_SINGLETOP_13TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy} + - {dataset: LHCB_Z0_7TEV_DIELECTRON_Y, custom_group: NNPDF40 datasets, cfac: [QCD]} + - {dataset: LHCB_Z0_8TEV_DIELECTRON_Y, custom_group: NNPDF40 datasets, cfac: [QCD]} + - {dataset: LHCB_DY_7TEV_MUON_Y, custom_group: NNPDF40 datasets, cfac: [NRM, QCD]} + - {dataset: LHCB_DY_8TEV_MUON_Y, custom_group: NNPDF40 datasets, cfac: [NRM, QCD]} + - {dataset: LHCB_Z0_13TEV_DIMUON-Y, custom_group: NNPDF40 datasets, cfac: [QCD]} + - {dataset: LHCB_Z0_13TEV_DIELECTRON-Y, custom_group: NNPDF40 datasets, cfac: [QCD]} groups: - metadata_group: custom_group diff --git a/validphys2/examples/generate_a_report.yaml b/validphys2/examples/generate_a_report.yaml index 3fbd20af2b..d656531ff8 100644 --- a/validphys2/examples/generate_a_report.yaml +++ b/validphys2/examples/generate_a_report.yaml @@ -3,22 +3,22 @@ meta: author: Lazy Person keywords: [chi2, replica, distribution, DISonly] -fit: NNPDF31_nnlo_as_0118_DISonly +fit: NNPDF40_nnlo_low_precision_240916 pdf: from_: "fit" -experiments: +dataset_inputs: from_: "fit" -theoryid: 162 +theoryid: 399 use_cuts: "fromfit" template_text: | # Histograms of χ2 ## DIS only distributions - {@experiments::experiment plot_chi2dist@} + {@ plot_chi2dist_experiments @} actions_: - report(main=True) diff --git a/validphys2/examples/spiderplot_dataspecs.yaml b/validphys2/examples/spiderplot_dataspecs.yaml index ef0e83530b..ec2b7ac134 100644 --- a/validphys2/examples/spiderplot_dataspecs.yaml +++ b/validphys2/examples/spiderplot_dataspecs.yaml @@ -3,7 +3,7 @@ meta: author: Lazy Person keywords: [example, spider] -fit: NNPDF40_nnlo_lowprecision +fit: NNPDF40_nnlo_low_precision_240916 theory: from_: fit diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py index 3fb3d3f22d..60440869e7 100644 --- a/validphys2/src/validphys/commondataparser.py +++ b/validphys2/src/validphys/commondataparser.py @@ -923,7 +923,6 @@ def load_commondata_new(metadata): nsys=nsys, commondata_table=commondata_table, systype_table=systype_table, - legacy=False, legacy_name=legacy_name, kin_variables=metadata.kinematic_coverage, ) @@ -949,6 +948,7 @@ def load_commondata(spec): ### Old commondata: +### All code below this line is deprecated and will be removed def load_commondata_old(commondatafile, systypefile, setname): """Parse a commondata file and a systype file into a CommonData. diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py index aba4f7d930..20c6ba6fe0 100644 --- a/validphys2/src/validphys/coredata.py +++ b/validphys2/src/validphys/coredata.py @@ -296,7 +296,7 @@ class CommonData: nsys: int commondata_table: pd.DataFrame = dataclasses.field(repr=False) systype_table: pd.DataFrame = dataclasses.field(repr=False) - legacy: bool + legacy: bool = False systematics_table: Optional[pd.DataFrame] = dataclasses.field(init=None, repr=False) legacy_name: Optional[str] = None kin_variables: Optional[list] = None diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py index 0558edf22d..de52e5cca8 100644 --- a/validphys2/src/validphys/loader.py +++ b/validphys2/src/validphys/loader.py @@ -204,6 +204,12 @@ def _use_fit_commondata_old_format_to_new_format(setname, file_path): if not file_path.exists(): raise DataNotFoundError(f"Data for {setname} at {file_path} not found") + # This function (as well as the loader) is only kept during this first tag to ensure that cuts fromfit + # can be used even with old fits... for now + log.error( + "Note, the function `_use_fit_commondata_old_format_to_new_format` is deprecated and will be removed in future releases" + ) + # Try loading the data from file_path, using the systypes from there # although they are not used systypes = next(file_path.parent.glob("systypes/*.dat")) @@ -312,26 +318,12 @@ def available_ekos(self): eko_path.parent.name.split("_")[1] for eko_path in self._theories_path.glob("*/eko.tar") } - @property - @functools.lru_cache - def _available_old_datasets(self): - """Provide all available datasets - At the moment this means cominbing the new and olf format datasets - """ - data_str = "DATA_" - old_commondata_folder = self.commondata_folder.with_name("commondata") - # We filter out the positivity and integrability sets here - return { - file.stem[len(data_str) :] - for file in old_commondata_folder.glob(f'{data_str}*.dat') - if not file.stem.startswith((f"{data_str}POS", f"{data_str}INTEG")) - } - @property @functools.lru_cache def available_datasets(self): """Provide all available datasets that were available before the new commondata was implemented and that have a translation. + Returns old names TODO: This should be substituted by a subset of `implemented_dataset` that returns only complete datasets. @@ -362,7 +354,7 @@ def available_pdfs(self): @property def commondata_folder(self): - return self.datapath / 'new_commondata' + return self.datapath / 'commondata' def _use_fit_commondata_old_format_to_old_format(self, basedata, fit): """Load pseudodata from a fit where the data was generated in the old format @@ -386,13 +378,7 @@ def _use_fit_commondata_old_format_to_old_format(self, basedata, fit): return data_path def check_commondata( - self, - setname, - sysnum=None, - use_fitcommondata=False, - fit=None, - variant=None, - force_old_format=False, + self, setname, sysnum=None, use_fitcommondata=False, fit=None, variant=None ): """Prepare the commondata files to be loaded. A commondata is defined by its name (``setname``) and the variant (``variant``) @@ -407,10 +393,6 @@ def check_commondata( Any actions trying to requests an old-format commondata from this function will log an error message. This error message will eventually become an actual error. """ - datafile = None - metadata_path = None - old_commondata_folder = self.commondata_folder.with_name("commondata") - if use_fitcommondata: if not fit: raise LoadFailedError("Must specify a fit when setting use_fitcommondata") @@ -419,9 +401,7 @@ def check_commondata( # 2. Whether the data was in the old format when it was generated # First, load the base commondata which will be used as container and to check point 1 - basedata = self.check_commondata( - setname, variant=variant, force_old_format=force_old_format, sysnum=sysnum - ) + basedata = self.check_commondata(setname, variant=variant, sysnum=sysnum) # and the possible filename for the new data data_path, unc_path = generate_path_filtered_data(fit.path, setname) @@ -441,81 +421,18 @@ def check_commondata( # Get data folder and observable name and check for existence try: - if not force_old_format: - setfolder, observable_name = setname.rsplit("_", 1) - metadata_path = self.commondata_folder / setfolder / "metadata.yaml" - force_old_format = not metadata_path.exists() + setfolder, observable_name = setname.rsplit("_", 1) except ValueError: - log.warning(f"Error trying to read {setname}, falling back to the old format reader") - force_old_format = True - - if not force_old_format: - # Get the instance of ObservableMetaData - try: - metadata = parse_new_metadata(metadata_path, observable_name, variant=variant) - return CommonDataSpec(setname, metadata) - except ValueError as e: - # Before failure, check whetehr this might be an old dataset - datafile = old_commondata_folder / f"DATA_{setname}.dat" - if not datafile.exists(): - raise e - - force_old_format = True - metadata_path = None - - # Eventually the error log will be replaced by the commented execption - log.error( - f"Trying to read {setname} in the old format. Note that this is deprecated and will be removed in future releases" - ) - - # Everything below is deprecated and will be removed in future releases - if datafile is None: - datafile = old_commondata_folder / f"DATA_{setname}.dat" - - if not datafile.exists(): raise DataNotFoundError( - f"No .dat file found for {setname} and no new data translation found" - ) - - if sysnum is None: - sysnum = 'DEFAULT' - sysfile = old_commondata_folder / "systypes" / f"SYSTYPE_{setname}_{sysnum}.dat" - - if not sysfile.exists(): - raise SysNotFoundError( - "Could not find systype %s for dataset '%s'. File %s does not exist." - % (sysnum, setname, sysfile) + f"Dataset {setname} not found. Is the name correct? Old commondata is no longer accepted" ) + set_path = self.commondata_folder / setfolder + if not set_path.exists(): + raise DataNotFoundError(f"Dataset {setname} not found") - plotfiles = [] - - metadata = peek_commondata_metadata(datafile) - process_plotting_root = old_commondata_folder / f'PLOTTINGTYPE_{metadata.process_type}' - type_plotting = ( - process_plotting_root.with_suffix('.yml'), - process_plotting_root.with_suffix('.yaml'), - ) - - data_plotting_root = old_commondata_folder / f'PLOTTING_{setname}' - - data_plotting = ( - data_plotting_root.with_suffix('.yml'), - data_plotting_root.with_suffix('.yaml'), - ) - # TODO: What do we do when both .yml and .yaml exist? - for tp in (type_plotting, data_plotting): - for p in tp: - if p.exists(): - plotfiles.append(p) - if setname != metadata.name: - raise InconsistentMetaDataError( - f"The name found in the CommonData file, {metadata.name}, did " - f"not match the dataset name, {setname}." - ) - - return CommonDataSpec( - setname, metadata, legacy=True, datafile=datafile, sysfile=sysfile, plotfiles=plotfiles - ) + metadata_path = set_path / "metadata.yaml" + metadata = parse_new_metadata(metadata_path, observable_name, variant=variant) + return CommonDataSpec(setname, metadata) @functools.lru_cache def check_theoryID(self, theoryID): @@ -750,8 +667,6 @@ def check_dataset( if not isinstance(theoryid, TheoryIDSpec): theoryid = self.check_theoryID(theoryid) - theoryno, _ = theoryid - # TODO: # The dataset is checked twice, once here # and once by config in produce_commondata @@ -761,21 +676,7 @@ def check_dataset( name, sysnum, use_fitcommondata=use_fitcommondata, fit=fit, variant=variant ) - if commondata.legacy: - if theoryid.is_pineappl(): - raise LoaderError( - f"Trying to use a new theory with an old commondata format, surely it must be a mistake: {name}" - ) - - # Old-format commondata that we haven't been able to translate - # allows only for the usage of only old-format theories - try: - fkspec, op = self.check_compound(theoryno, name, cfac) - except CompoundNotFound: - fkspec = self.check_fktable(theoryno, name, cfac) - op = None - else: - fkspec, op = self._check_theory_old_or_new(theoryid, commondata, cfac) + fkspec, op = self._check_theory_old_or_new(theoryid, commondata, cfac) # Note this is simply for convenience when scripting. The config will # construct the actual Cuts object by itself diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py index fd6b0129d1..bad704d781 100644 --- a/validphys2/src/validphys/tests/conftest.py +++ b/validphys2/src/validphys/tests/conftest.py @@ -72,11 +72,11 @@ def tmp(tmpdir): THEORYID = 162 THEORYID_NEW = 399 THEORY_QED = 398 -FIT = "NNPDF40_nnlo_lowprecision" +FIT = "NNPDF40_nnlo_low_precision_240916" FIT_3REPLICAS = "Basic_runcard_3replicas_lowprec_221130" FIT_3REPLICAS_DCUTS = "Basic_runcard_3replicas_diffcuts_230221" -FIT_ITERATED = "NNPDF40_nnlo_low_precision_iterated" -PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_221130" +FIT_ITERATED = "NNPDF40_nnlo_low_precision_240916_iterated" +PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_240916" base_config = dict(pdf=PDF, use_cuts='nocuts', dataset_inputs=DATA, theoryid=THEORYID_NEW, Q=10) diff --git a/validphys2/src/validphys/tests/test_loader.py b/validphys2/src/validphys/tests/test_loader.py index b106d93e98..dead6222d4 100644 --- a/validphys2/src/validphys/tests/test_loader.py +++ b/validphys2/src/validphys/tests/test_loader.py @@ -3,6 +3,7 @@ Test loading utilities. """ + import os from pathlib import Path import subprocess as sp @@ -12,6 +13,7 @@ from hypothesis.strategies import composite, sampled_from, sets import pytest +from nnpdf_data import legacy_to_new_map from validphys.loader import NNPDF_DIR, FallbackLoader, FitNotFound from validphys.plotoptions.core import get_info, kitable from validphys.tests.conftest import FIT, FIT_3REPLICAS, THEORYID_NEW @@ -32,7 +34,8 @@ def load(self): @composite def commondata_and_cuts(draw): old_name = draw(sampled_from(dss)) - cd = l.check_commondata(old_name, force_old_format=True) + new_name, variant = legacy_to_new_map(old_name) + cd = l.check_commondata(new_name, variant=variant) ndata = cd.metadata.ndata # Get a cut mask with at least one selected datapoint masks = sets(sampled_from(range(ndata)), min_size=1) diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py index 7b4efeb50d..2b0599853d 100644 --- a/validphys2/src/validphys/tests/test_pseudodata.py +++ b/validphys2/src/validphys/tests/test_pseudodata.py @@ -75,7 +75,7 @@ def test_no_savepseudodata(): def test_read_matches_recreate(): reads = API.read_fit_pseudodata(fit=PSEUDODATA_FIT) - recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT, separate_multiplicative=True) + recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT) for read, recreate in zip(reads, recreates): # We ignore the absolute ordering of the dataframes and just check # that they contain identical elements. diff --git a/validphys2/src/validphys/tests/test_pyfkdata.py b/validphys2/src/validphys/tests/test_pyfkdata.py index 47ab86ef3b..75d5b6727f 100644 --- a/validphys2/src/validphys/tests/test_pyfkdata.py +++ b/validphys2/src/validphys/tests/test_pyfkdata.py @@ -8,41 +8,40 @@ from validphys.fkparser import load_fktable from validphys.loader import FallbackLoader as Loader from validphys.results import PositivityResult, ThPredictionsResult -from validphys.tests.conftest import HESSIAN_PDF, PDF, POSITIVITIES, THEORYID, THEORYID_NEW +from validphys.tests.conftest import DATA, HESSIAN_PDF, PDF, POSITIVITIES, THEORYID, THEORYID_NEW + +DS1 = DATA[2]["dataset"] # hadronic +DS2 = DATA[0]["dataset"] # dis def test_basic_loading(): + """Test the loading of an old theory using directly the legacy name""" l = Loader() # Test both with and without cfactors, and load both DIS and hadronic for cfac in ((), ("QCD",)): - fk = l.check_fktable(setname="ATLASTTBARTOT", theoryID=THEORYID, cfac=cfac) - res = load_fktable(fk) - assert res.ndata == 3 + ds = l.check_dataset(DS1, theoryid=THEORYID_NEW, cfac=cfac) + res = load_fktable(ds.fkspecs[0]) + assert res.ndata == 50 assert isinstance(res.sigma, pd.DataFrame) - fk = l.check_fktable(setname="H1HERAF2B", theoryID=THEORYID, cfac=()) - res = load_fktable(fk) - assert res.ndata == 12 + ds = l.check_dataset(DS2, theoryid=THEORYID_NEW) + res = load_fktable(ds.fkspecs[0]) + assert res.ndata == 292 assert isinstance(res.sigma, pd.DataFrame) - # Check if cfactors for datasets having one entry are correctly parsed - fk = l.check_fktable(setname="CMSTTBARTOT7TEV", theoryID=THEORYID, cfac=("QCD",)) - res = load_fktable(fk) - assert res.ndata == 1 - def test_cuts(): l = Loader() - ds = l.check_dataset("ATLASTTBARTOT", theoryid=THEORYID, cfac=("QCD",)) + ds = l.check_dataset(DS1, theoryid=THEORYID_NEW, cfac=("QCD",), variant="legacy") table = load_fktable(ds.fkspecs[0]) # Check explicit cuts newtable = table.with_cuts([0, 1]) assert set(newtable.sigma.index.get_level_values(0)) == {0, 1} assert newtable.ndata == 2 - assert newtable.metadata["GridInfo"].ndata == ds.commondata.ndata + assert table.ndata == ds.commondata.ndata # Check empty cuts assert newtable.with_cuts(None) is newtable # Check loaded cuts - ds = l.check_dataset("H1HERAF2B", theoryid=THEORYID) + ds = l.check_dataset(DS2, theoryid=THEORYID_NEW, variant="legacy") table = load_fktable(ds.fkspecs[0]) newtable = table.with_cuts(ds.cuts) assert len(newtable.sigma.index.get_level_values(0).unique()) == len(ds.cuts.load()) @@ -55,17 +54,20 @@ def test_predictions(pdf_name): l = Loader() pdf = l.check_pdf(pdf_name) datasets = [ - {"name": "ATLASTTBARTOT", "cfac": ("QCD",)}, # cfactors - {"name": "H1HERAF2B"}, # DIS, op: NULL - {"name": "D0ZRAP"}, # op: RATIO - {"name": "D0WEASY"}, # op: ASY - {"name": "CMSWCHARMTOT"}, # op: ADD - {"name": "ATLASWPT31PB"}, # op: SMN - {"name": "DYE906R"}, # op: COM <---- - {"name": "DYE906_D"}, # op: SMT <---- + {"name": DS1, "cfac": ("QCD",)}, # cfactors + {"name": DS2}, # DIS, op: NULL + {"name": "D0_Z0_1P96TEV_ZRAP"}, # op: RATIO + {"name": "D0_WPWM_1P96TEV_ASY"}, # op: ASY + {"name": "CMS_SINGLETOP_7TEV_TCHANNEL-XSEC"}, # op: ADD + # Not included in the light theoryid + # {"name": "DYE906_Z0_120GEV_DW_PDXSECRATIO"}, # op: COM + # Not used in any dataset: + # {"name": "DYE906_D"}, # op: SMT + # {"name": "ATLASWPT31PB"}, # op: SMN ] for daset in datasets: - ds = l.check_dataset(**daset, theoryid=THEORYID) + daset["variant"] = "legacy" + ds = l.check_dataset(**daset, theoryid=THEORYID_NEW) preds = predictions(ds, pdf) core_predictions = ThPredictionsResult.from_convolution(pdf, ds) # Uses rawdata since we want to check all members for which we computed the convolution @@ -94,13 +96,13 @@ def test_positivity(pdf_name): pdf = l.check_pdf(pdf_name) for posset in POSITIVITIES: # Use the loader to load the positivity dataset - ps = l.check_posset(setname=posset, theoryID=THEORYID, postlambda=1e6, rules=()) + ps = l.check_posset(setname=posset, theoryID=THEORYID_NEW, postlambda=1e6, rules=()) preds = predictions(ps, pdf) core_predictions = PositivityResult.from_convolution(pdf, ps) assert_allclose(preds.values, core_predictions.rawdata) # Now do the same with the API api_predictions = API.positivity_predictions_data_result( - theoryid=THEORYID, + theoryid=THEORYID_NEW, use_cuts="internal", pdf=pdf_name, posdataset={"dataset": posset, "maxlambda": 1e6}, @@ -112,11 +114,11 @@ def test_positivity(pdf_name): def test_extended_predictions(): - """Test the python predictions dataframe stasts with MC sets""" + """Test the python predictions dataframe stats with MC sets""" l = Loader() pdf = l.check_pdf(PDF) - had = l.check_dataset("ATLASTTBARTOT", theoryid=THEORYID, cfac=("QCD",)) - dis = l.check_dataset("H1HERAF2B", theoryid=THEORYID) + had = l.check_dataset(DS1, theoryid=THEORYID, cfac=("QCD",), variant="legacy") + dis = l.check_dataset(DS2, theoryid=THEORYID, variant="legacy") dis_all = predictions(dis, pdf).T dis_central = central_predictions(dis, pdf).T assert np.allclose(dis_all.mean().values, dis_central.values) @@ -128,34 +130,3 @@ def test_extended_predictions(): assert np.allclose(had_linear.mean().values, had_central) assert not np.allclose(had_all.mean().values, had_central) assert np.all((had_linear - had_all).std() < had_all.std()) - - -@pytest.mark.parametrize("dataset", ["CMSWMASY47FB", "ATLASWZRAP11CC", "LHCBWZMU7TEV"]) -def test_compare_cf(data_internal_cuts_config, data_internal_cuts_new_theory_config, dataset): - """Loads datasets from the two low-precision theories (one old, one new) - and checks that the result is the same despite being read differently""" - config = dict(data_internal_cuts_config) - config_new = dict(data_internal_cuts_new_theory_config) - - pdf = API.pdf(**config) - - dinput = {"dataset": dataset} - config["dataset_input"] = dinput - config_new["dataset_input"] = dinput - - ds_old = API.dataset(**config) - ds_new = API.dataset(**config_new) - res_old = central_predictions(ds_old, pdf) - res_new = central_predictions(ds_new, pdf) - - dinput["cfac"] = ["QCD"] - ds_old_cfac = API.dataset(**config) - ds_new_cfac = API.dataset(**config_new) - - res_old_cfac = central_predictions(ds_old_cfac, pdf) - res_new_cfac = central_predictions(ds_new_cfac, pdf) - - old_cfac = res_old_cfac / res_old - new_cfac = res_new_cfac / res_new - - np.testing.assert_allclose(new_cfac, old_cfac, rtol=1e-4)