From 2bbff1aa829e410739b320426ab27135d374019e Mon Sep 17 00:00:00 2001
From: juacrumar <juacrumar@lairen.eu>
Date: Tue, 17 Sep 2024 11:00:37 +0200
Subject: [PATCH]  minimal modifications so that test and examples run without
 old commondata, remove the option to force reading an old commondata

---
 doc/sphinx/source/data/data-config.rst        |   4 +-
 nnpdf_data/nnpdf_data/__init__.py             |  24 ++-
 nnpdf_data/pyproject.toml                     |   4 +-
 pyproject.toml                                |   3 +
 .../examples/data_theory_comparison.yaml      |   6 +-
 validphys2/examples/dataspecs.yaml            |   6 +-
 validphys2/examples/future_test_example.yaml  | 149 +++++++++---------
 validphys2/examples/generate_a_report.yaml    |   8 +-
 validphys2/examples/spiderplot_dataspecs.yaml |   2 +-
 validphys2/src/validphys/commondataparser.py  |   2 +-
 validphys2/src/validphys/coredata.py          |   2 +-
 validphys2/src/validphys/loader.py            | 137 +++-------------
 validphys2/src/validphys/tests/conftest.py    |   6 +-
 validphys2/src/validphys/tests/test_loader.py |   5 +-
 .../src/validphys/tests/test_pseudodata.py    |   2 +-
 .../src/validphys/tests/test_pyfkdata.py      |  91 ++++-------
 16 files changed, 164 insertions(+), 287 deletions(-)

diff --git a/doc/sphinx/source/data/data-config.rst b/doc/sphinx/source/data/data-config.rst
index ae44d3a6fa..8d249a56bc 100644
--- a/doc/sphinx/source/data/data-config.rst
+++ b/doc/sphinx/source/data/data-config.rst
@@ -19,7 +19,7 @@ Experimental data storage
 The central repository for ``CommonData`` in use by ``nnpdf`` projects is
 located in the ``nnpdf`` git repository at
 
-	``nnpdf_data/nnpdf_data/new_commondata``
+	``nnpdf_data/nnpdf_data/commondata``
 
 where a separate ``CommonData`` file is stored for each *Dataset* with the
 filename format described in :ref:`dataset-naming-convention`.
@@ -45,7 +45,7 @@ The following lines will check whether a newly added theory can be read by valid
 (change 700 by the id of your newly added theory).
 
 ..  code-block:: python
-    
+
     from nnpdf_data import theory_cards
     from nnpdf_data.theorydbutils import fetch_theory
     theory = fetch_theory(theory_cards, 700)
diff --git a/nnpdf_data/nnpdf_data/__init__.py b/nnpdf_data/nnpdf_data/__init__.py
index 70e91afdad..b7f2a72295 100644
--- a/nnpdf_data/nnpdf_data/__init__.py
+++ b/nnpdf_data/nnpdf_data/__init__.py
@@ -6,13 +6,18 @@
 from ._version import __version__
 
 path_vpdata = pathlib.Path(__file__).parent
-path_commondata = path_vpdata / "new_commondata"
+path_commondata = path_vpdata / "commondata"
 
 # VP should not have access to this file, only to the products
 _path_legacy_mapping = path_commondata / "dataset_names.yml"
-legacy_to_new_mapping = yaml.YAML().load(_path_legacy_mapping)
 theory_cards = path_vpdata / "theory_cards"
 
+_legacy_to_new_mapping_raw = yaml.YAML().load(_path_legacy_mapping)
+# Convert strings into a dictionary
+legacy_to_new_mapping = {
+    k: ({"dataset": v} if isinstance(v, str) else v) for k, v in _legacy_to_new_mapping_raw.items()
+}
+
 
 @lru_cache
 def legacy_to_new_map(dataset_name, sys=None):
@@ -22,13 +27,6 @@ def legacy_to_new_map(dataset_name, sys=None):
         return dataset_name, None
 
     new_name = legacy_to_new_mapping[dataset_name]
-    if isinstance(new_name, str):
-        if sys is not None:
-            raise KeyError(
-                f"I cannot translate the combination of {dataset_name} and sys: {sys}. Please report this."
-            )
-        return new_name, None
-
     variant = new_name.get("variant")
     new_name = new_name["dataset"]
     if sys is not None:
@@ -48,11 +46,9 @@ def new_to_legacy_map(dataset_name, variant_used):
     # we can have 2 old dataset mapped to the same new one
 
     possible_match = None
-    for old_name, new_name in legacy_to_new_mapping.items():
-        variant = None
-        if not isinstance(new_name, str):
-            variant = new_name.get("variant")
-            new_name = new_name["dataset"]
+    for old_name, new_info in legacy_to_new_mapping.items():
+        new_name = new_info["dataset"]
+        variant = new_info.get("variant")
 
         if new_name == dataset_name:
             if variant_used == variant:
diff --git a/nnpdf_data/pyproject.toml b/nnpdf_data/pyproject.toml
index ab828cf652..70d9749b24 100644
--- a/nnpdf_data/pyproject.toml
+++ b/nnpdf_data/pyproject.toml
@@ -22,8 +22,8 @@ repository = "https://github.com/NNPDF/nnpdf_data"
 
 # Exclude intermediate data files
 exclude = [
-    "nnpdf_data/new_commondata/*/rawdata",
-    "nnpdf_data/new_commondata/*/*.py",
+    "nnpdf_data/commondata/*/rawdata",
+    "nnpdf_data/commondata/*/*.py",
 ]
 # Data files
 include = [
diff --git a/pyproject.toml b/pyproject.toml
index cb3809b892..104b979d69 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -140,3 +140,6 @@ profile = "black" # https://black.readthedocs.io/en/stable/guides/using_black_wi
 skip_gitignore = true
 known_first_party = ["validphys", "eko", "n3fit", "nnpdf_data", "reportengine"]
 force_sort_within_sections = true
+
+[tool.pytest.ini_options]
+addopts = "--disable-warnings"
diff --git a/validphys2/examples/data_theory_comparison.yaml b/validphys2/examples/data_theory_comparison.yaml
index d726aa7f9b..f390b6d377 100644
--- a/validphys2/examples/data_theory_comparison.yaml
+++ b/validphys2/examples/data_theory_comparison.yaml
@@ -4,10 +4,10 @@ meta:
     author: Lazy Person
 
 pdfs:
-    - id: NNPDF40_nnlo_lowprecision
-      label: NNPDF40_nnlo_lowprecision
+    - id: NNPDF40_nnlo_low_precision
+      label: NNPDF40_nnlo_low_precision
 
-theoryid: 162
+theoryid: 399
 
 use_cuts: "internal"
 
diff --git a/validphys2/examples/dataspecs.yaml b/validphys2/examples/dataspecs.yaml
index 54d9c2c881..a45d5ab93e 100644
--- a/validphys2/examples/dataspecs.yaml
+++ b/validphys2/examples/dataspecs.yaml
@@ -26,10 +26,10 @@ dataspecs:
       speclabel: "NLO"
       fit: NNPDF40_nlo_as_01180
 
-    - theoryid: 162
-      pdf: NNPDF40_nnlo_lowprecision
+    - theoryid: 399
+      pdf: NNPDF40_nnlo_low_precision_240916
       speclabel: "NNLO"
-      fit: NNPDF40_nnlo_lowprecision
+      fit: NNPDF40_nnlo_low_precision_240916
 
 
 template_text: |
diff --git a/validphys2/examples/future_test_example.yaml b/validphys2/examples/future_test_example.yaml
index abd79e5cd9..108360c6f3 100644
--- a/validphys2/examples/future_test_example.yaml
+++ b/validphys2/examples/future_test_example.yaml
@@ -12,8 +12,8 @@ my_description:
   from_: meta
 
 future:
-  pdf: {id: 210219-01-rs-nnpdf40-baseline, label: "NNPDF4.0"}
-  fit: {id: 210219-01-rs-nnpdf40-baseline, label: "NNPDF4.0"}
+  pdf: {id: 240516-jcm-nnpdf40-like, label: "NNPDF4.0"}
+  fit: {id: 240516-jcm-nnpdf40-like, label: "NNPDF4.0"}
 
   speclabel: "NNPDF4.0 fit"
   description:
@@ -41,77 +41,80 @@ past:
     from_: fit
 
 dataset_inputs:
-  - {dataset: NMCPD_dw, custom_group: "datasets pre HERA"}
-  - {dataset: NMC, custom_group: "datasets pre HERA"}
-  - {dataset: SLACP_dwsh, custom_group: "datasets pre HERA"}
-  - {dataset: SLACD_dw, custom_group: "datasets pre HERA"}
-  - {dataset: BCDMSP_dwsh, custom_group: "datasets pre HERA"}
-  - {dataset: BCDMSD_dw, custom_group: "datasets pre HERA"}
-  - {dataset: CHORUSNUPb_dw, custom_group: "datasets pre HERA"}
-  - {dataset: CHORUSNBPb_dw, custom_group: "datasets pre HERA"}
-  - {dataset: NTVNUDMNFe_dw, custom_group: "datasets pre HERA", cfac: [MAS]}
-  - {dataset: NTVNBDMNFe_dw, custom_group: "datasets pre HERA", cfac: [MAS]}
-  - {dataset: HERACOMBNCEM, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMBNCEP460, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMBNCEP575, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMBNCEP820, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMBNCEP920, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMBCCEM, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMBCCEP, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMB_SIGMARED_C, custom_group: "datasets pre LHC"}
-  - {dataset: HERACOMB_SIGMARED_B, custom_group: "datasets pre LHC"}
-  - {dataset: DYE886R_dw, custom_group: "datasets pre HERA"}
-  - {dataset: DYE886P, custom_group: "datasets pre HERA", cfac: [QCD]}
-  - {dataset: DYE605_dw, custom_group: "datasets pre HERA", cfac: [QCD]}
-  - {dataset: CDFZRAP_NEW, custom_group: "datasets pre LHC", cfac: [QCD]}
-  - {dataset: D0ZRAP, custom_group: "datasets pre LHC", cfac: [QCD]}
-  - {dataset: D0WMASY, custom_group: "datasets pre LHC", cfac: [QCD]}
-  - {dataset: ATLASWZRAP36PB, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASZHIGHMASS49FB, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASLOMASSDY11EXT, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASWZRAP11CC, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASWZRAP11CF, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASDY2D8TEV, custom_group: "NNPDF40 datasets", cfac: [QCDEWK]}
-  - {dataset: ATLAS_WZ_TOT_13TEV, custom_group: "NNPDF40 datasets", cfac: [NRM, QCD]}
-  - {dataset: ATLAS_WP_JET_8TEV_PT, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_WM_JET_8TEV_PT, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASZPT8TEVMDIST, custom_group: "NNPDF40 datasets", cfac: [QCD], sys: 10}
-  - {dataset: ATLASZPT8TEVYDIST, custom_group: "NNPDF40 datasets", cfac: [QCD], sys: 10}
-  - {dataset: ATLASTTBARTOT, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_TTB_DIFF_8TEV_LJ_TRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_TTB_DIFF_8TEV_LJ_TTRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_TOPDIFF_DILEPT_8TEV_TTRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_1JET_8TEV_R06_DEC, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_2JET_7TEV_R06, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLASPHT15, custom_group: "NNPDF40 datasets", cfac: [QCD, EWK]}
-  - {dataset: ATLAS_SINGLETOP_TCH_R_7TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_SINGLETOP_TCH_R_13TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSWEASY840PB, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSWMASY47FB, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSDY2D11, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSWMU8TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSZDIFF12, custom_group: "NNPDF40 datasets", cfac: [QCD, NRM], sys: 10}
-  - {dataset: CMS_2JET_7TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_2JET_3D_8TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSTTBARTOT, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSTOPDIFF8TEVTTRAPNORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMSTTBARTOT5TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_TTBAR_2D_DIFF_MTT_TRAP_NORM, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_TTB_DIFF_13TEV_2016_2L_TRAP, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_TTB_DIFF_13TEV_2016_LJ_TRAP, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_SINGLETOP_TCH_TOT_7TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_SINGLETOP_TCH_R_8TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: CMS_SINGLETOP_TCH_R_13TEV, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: LHCBZ940PB, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: LHCBZEE2FB, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: LHCBWZMU7TEV, custom_group: "NNPDF40 datasets", cfac: [NRM, QCD]}
-  - {dataset: LHCBWZMU8TEV, custom_group: "NNPDF40 datasets", cfac: [NRM, QCD]}
-  - {dataset: LHCB_Z_13TEV_DIMUON, custom_group: "NNPDF40 datasets", cfac: [QCD]}
-  - {dataset: LHCB_Z_13TEV_DIELECTRON, custom_group: "NNPDF40 datasets", cfac: [QCD]}
+  - {dataset: NMC_NC_NOTFIXED_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: SLAC_NC_NOTFIXED_P_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: SLAC_NC_NOTFIXED_D_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: BCDMS_NC_NOTFIXED_P_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: BCDMS_NC_NOTFIXED_D_DW_EM-F2, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: CHORUS_CC_NOTFIXED_PB_DW_NU-SIGMARED, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: CHORUS_CC_NOTFIXED_PB_DW_NB-SIGMARED, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: NUTEV_CC_NOTFIXED_FE_DW_NU-SIGMARED, custom_group: datasets pre HERA, cfac: [MAS], variant: legacy}
+  - {dataset: NUTEV_CC_NOTFIXED_FE_DW_NB-SIGMARED, custom_group: datasets pre HERA, cfac: [MAS], variant: legacy}
+  - {dataset: HERA_NC_318GEV_EM-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_NC_225GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_NC_251GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_NC_300GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_NC_318GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_CC_318GEV_EM-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_CC_318GEV_EP-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: HERA_NC_318GEV_EAVG_BOTTOM-SIGMARED, custom_group: datasets pre LHC, variant: legacy}
+  - {dataset: DYE866_Z0_800GEV_DW_RATIO_PDXSECRATIO, custom_group: datasets pre HERA, variant: legacy}
+  - {dataset: DYE866_Z0_800GEV_PXSEC, custom_group: datasets pre HERA, cfac: [QCD], variant: legacy}
+  - {dataset: DYE605_Z0_38P8GEV_DW_PXSEC, custom_group: datasets pre HERA, cfac: [QCD], variant: legacy}
+  - {dataset: CDF_Z0_1P96TEV_ZRAP, custom_group: datasets pre LHC, cfac: [QCD], variant: legacy}
+  - {dataset: D0_Z0_1P96TEV_ZRAP, custom_group: datasets pre LHC, cfac: [QCD], variant: legacy}
+  - {dataset: D0_WPWM_1P96TEV_ASY, custom_group: datasets pre LHC, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_DY_7TEV_36PB_ETA, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_Z0_7TEV_49FB_HIMASS, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_Z0_7TEV_LOMASS_M, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_DY_7TEV_46FB_CC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_Z0_7TEV_46FB_CF-Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_Z0_8TEV_HIMASS_M-Y, custom_group: NNPDF40 datasets, cfac: [QCDEWK], variant: legacy}
+  - {dataset: ATLAS_DY_13TEV_TOT, custom_group: NNPDF40 datasets, cfac: [NRM, QCD], variant: legacy}
+  - {dataset: ATLAS_WJ_8TEV_WP-PT, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_WJ_8TEV_WM-PT, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_Z0J_8TEV_PT-M, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy_10}
+  - {dataset: ATLAS_Z0J_8TEV_PT-Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy_10}
+  - {dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_TTBAR_8TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_TTBAR_13TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YT-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_TTBAR_8TEV_2L_DIF_YTTBAR-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_1JET_8TEV_R06_PTY, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy_decorrelated}
+  - {dataset: ATLAS_2JET_7TEV_R06_M12Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_PH_13TEV_XSEC, custom_group: "NNPDF40 datasets", cfac: [QCD, EWK], variant: legacy}
+  - {dataset: ATLAS_SINGLETOP_7TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_SINGLETOP_13TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_SINGLETOP_7TEV_T-Y-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_SINGLETOP_7TEV_TBAR-Y-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_SINGLETOP_8TEV_T-RAP-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: ATLAS_SINGLETOP_8TEV_TBAR-RAP-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_WPWM_7TEV_ELECTRON_ASY, custom_group: NNPDF40 datasets, cfac: [QCD]}
+  - {dataset: CMS_WPWM_7TEV_MUON_ASY, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_Z0_7TEV_DIMUON_2D, custom_group: NNPDF40 datasets, cfac: [QCD]}
+  - {dataset: CMS_WPWM_8TEV_MUON_Y, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_Z0J_8TEV_PT-Y, custom_group: NNPDF40 datasets, cfac: [QCD, NRM], variant: legacy_10}
+  - {dataset: CMS_2JET_7TEV_M12Y, custom_group: NNPDF40 datasets, cfac: [QCD]}
+  - {dataset: CMS_TTBAR_7TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_8TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_13TEV_TOT_X-SEC, custom_group: "NNPDF40 datasets", cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_8TEV_LJ_DIF_YTTBAR-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_5TEV_TOT_X-SEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_8TEV_2L_DIF_MTTBAR-YT-NORM, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_13TEV_2L_DIF_YT, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_TTBAR_13TEV_LJ_2016_DIF_YTTBAR, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_SINGLETOP_7TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_SINGLETOP_8TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: CMS_SINGLETOP_13TEV_TCHANNEL-XSEC, custom_group: NNPDF40 datasets, cfac: [QCD], variant: legacy}
+  - {dataset: LHCB_Z0_7TEV_DIELECTRON_Y, custom_group: NNPDF40 datasets, cfac: [QCD]}
+  - {dataset: LHCB_Z0_8TEV_DIELECTRON_Y, custom_group: NNPDF40 datasets, cfac: [QCD]}
+  - {dataset: LHCB_DY_7TEV_MUON_Y, custom_group: NNPDF40 datasets, cfac: [NRM, QCD]}
+  - {dataset: LHCB_DY_8TEV_MUON_Y, custom_group: NNPDF40 datasets, cfac: [NRM, QCD]}
+  - {dataset: LHCB_Z0_13TEV_DIMUON-Y, custom_group: NNPDF40 datasets, cfac: [QCD]}
+  - {dataset: LHCB_Z0_13TEV_DIELECTRON-Y, custom_group: NNPDF40 datasets, cfac: [QCD]}
 
 groups:
   - metadata_group: custom_group
diff --git a/validphys2/examples/generate_a_report.yaml b/validphys2/examples/generate_a_report.yaml
index 3fbd20af2b..d656531ff8 100644
--- a/validphys2/examples/generate_a_report.yaml
+++ b/validphys2/examples/generate_a_report.yaml
@@ -3,22 +3,22 @@ meta:
   author: Lazy Person
   keywords: [chi2, replica, distribution, DISonly]
 
-fit: NNPDF31_nnlo_as_0118_DISonly
+fit: NNPDF40_nnlo_low_precision_240916
 
 pdf:
   from_: "fit"
 
-experiments:
+dataset_inputs:
   from_: "fit"
 
-theoryid: 162
+theoryid: 399
 
 use_cuts: "fromfit"
 
 template_text: |
   # Histograms of χ2
   ## DIS only distributions
-  {@experiments::experiment plot_chi2dist@}
+  {@ plot_chi2dist_experiments  @}
 
 actions_:
   - report(main=True)
diff --git a/validphys2/examples/spiderplot_dataspecs.yaml b/validphys2/examples/spiderplot_dataspecs.yaml
index ef0e83530b..ec2b7ac134 100644
--- a/validphys2/examples/spiderplot_dataspecs.yaml
+++ b/validphys2/examples/spiderplot_dataspecs.yaml
@@ -3,7 +3,7 @@ meta:
     author: Lazy Person
     keywords: [example, spider]
 
-fit: NNPDF40_nnlo_lowprecision
+fit: NNPDF40_nnlo_low_precision_240916
 
 theory:
     from_: fit
diff --git a/validphys2/src/validphys/commondataparser.py b/validphys2/src/validphys/commondataparser.py
index 3fb3d3f22d..60440869e7 100644
--- a/validphys2/src/validphys/commondataparser.py
+++ b/validphys2/src/validphys/commondataparser.py
@@ -923,7 +923,6 @@ def load_commondata_new(metadata):
         nsys=nsys,
         commondata_table=commondata_table,
         systype_table=systype_table,
-        legacy=False,
         legacy_name=legacy_name,
         kin_variables=metadata.kinematic_coverage,
     )
@@ -949,6 +948,7 @@ def load_commondata(spec):
 
 
 ### Old commondata:
+### All code below this line is deprecated and will be removed
 def load_commondata_old(commondatafile, systypefile, setname):
     """Parse a commondata file  and a systype file into a CommonData.
 
diff --git a/validphys2/src/validphys/coredata.py b/validphys2/src/validphys/coredata.py
index aba4f7d930..20c6ba6fe0 100644
--- a/validphys2/src/validphys/coredata.py
+++ b/validphys2/src/validphys/coredata.py
@@ -296,7 +296,7 @@ class CommonData:
     nsys: int
     commondata_table: pd.DataFrame = dataclasses.field(repr=False)
     systype_table: pd.DataFrame = dataclasses.field(repr=False)
-    legacy: bool
+    legacy: bool = False
     systematics_table: Optional[pd.DataFrame] = dataclasses.field(init=None, repr=False)
     legacy_name: Optional[str] = None
     kin_variables: Optional[list] = None
diff --git a/validphys2/src/validphys/loader.py b/validphys2/src/validphys/loader.py
index 0558edf22d..de52e5cca8 100644
--- a/validphys2/src/validphys/loader.py
+++ b/validphys2/src/validphys/loader.py
@@ -204,6 +204,12 @@ def _use_fit_commondata_old_format_to_new_format(setname, file_path):
     if not file_path.exists():
         raise DataNotFoundError(f"Data for {setname} at {file_path} not found")
 
+    # This function (as well as the loader) is only kept during this first tag to ensure that cuts fromfit
+    # can be used even with old fits... for now
+    log.error(
+        "Note, the function `_use_fit_commondata_old_format_to_new_format` is deprecated and will be removed in future releases"
+    )
+
     # Try loading the data from file_path, using the systypes from there
     # although they are not used
     systypes = next(file_path.parent.glob("systypes/*.dat"))
@@ -312,26 +318,12 @@ def available_ekos(self):
             eko_path.parent.name.split("_")[1] for eko_path in self._theories_path.glob("*/eko.tar")
         }
 
-    @property
-    @functools.lru_cache
-    def _available_old_datasets(self):
-        """Provide all available datasets
-        At the moment this means cominbing the new and olf format datasets
-        """
-        data_str = "DATA_"
-        old_commondata_folder = self.commondata_folder.with_name("commondata")
-        # We filter out the positivity and integrability sets here
-        return {
-            file.stem[len(data_str) :]
-            for file in old_commondata_folder.glob(f'{data_str}*.dat')
-            if not file.stem.startswith((f"{data_str}POS", f"{data_str}INTEG"))
-        }
-
     @property
     @functools.lru_cache
     def available_datasets(self):
         """Provide all available datasets that were available before the new commondata
         was implemented and that have a translation.
+        Returns old names
 
         TODO: This should be substituted by a subset of `implemented_dataset` that returns only
         complete datasets.
@@ -362,7 +354,7 @@ def available_pdfs(self):
 
     @property
     def commondata_folder(self):
-        return self.datapath / 'new_commondata'
+        return self.datapath / 'commondata'
 
     def _use_fit_commondata_old_format_to_old_format(self, basedata, fit):
         """Load pseudodata from a fit where the data was generated in the old format
@@ -386,13 +378,7 @@ def _use_fit_commondata_old_format_to_old_format(self, basedata, fit):
         return data_path
 
     def check_commondata(
-        self,
-        setname,
-        sysnum=None,
-        use_fitcommondata=False,
-        fit=None,
-        variant=None,
-        force_old_format=False,
+        self, setname, sysnum=None, use_fitcommondata=False, fit=None, variant=None
     ):
         """Prepare the commondata files to be loaded.
         A commondata is defined by its name (``setname``) and the variant (``variant``)
@@ -407,10 +393,6 @@ def check_commondata(
         Any actions trying to requests an old-format commondata from this function will log
         an error message. This error message will eventually become an actual error.
         """
-        datafile = None
-        metadata_path = None
-        old_commondata_folder = self.commondata_folder.with_name("commondata")
-
         if use_fitcommondata:
             if not fit:
                 raise LoadFailedError("Must specify a fit when setting use_fitcommondata")
@@ -419,9 +401,7 @@ def check_commondata(
             # 2. Whether the data was in the old format when it was generated
 
             # First, load the base commondata which will be used as container and to check point 1
-            basedata = self.check_commondata(
-                setname, variant=variant, force_old_format=force_old_format, sysnum=sysnum
-            )
+            basedata = self.check_commondata(setname, variant=variant, sysnum=sysnum)
             # and the possible filename for the new data
             data_path, unc_path = generate_path_filtered_data(fit.path, setname)
 
@@ -441,81 +421,18 @@ def check_commondata(
 
         # Get data folder and observable name and check for existence
         try:
-            if not force_old_format:
-                setfolder, observable_name = setname.rsplit("_", 1)
-                metadata_path = self.commondata_folder / setfolder / "metadata.yaml"
-                force_old_format = not metadata_path.exists()
+            setfolder, observable_name = setname.rsplit("_", 1)
         except ValueError:
-            log.warning(f"Error trying to read {setname}, falling back to the old format reader")
-            force_old_format = True
-
-        if not force_old_format:
-            # Get the instance of ObservableMetaData
-            try:
-                metadata = parse_new_metadata(metadata_path, observable_name, variant=variant)
-                return CommonDataSpec(setname, metadata)
-            except ValueError as e:
-                # Before failure, check whetehr this might be an old dataset
-                datafile = old_commondata_folder / f"DATA_{setname}.dat"
-                if not datafile.exists():
-                    raise e
-
-                force_old_format = True
-                metadata_path = None
-
-        # Eventually the error log will be replaced by the commented execption
-        log.error(
-            f"Trying to read {setname} in the old format. Note that this is deprecated and will be removed in future releases"
-        )
-
-        # Everything below is deprecated and will be removed in future releases
-        if datafile is None:
-            datafile = old_commondata_folder / f"DATA_{setname}.dat"
-
-        if not datafile.exists():
             raise DataNotFoundError(
-                f"No .dat file found for {setname} and no new data translation found"
-            )
-
-        if sysnum is None:
-            sysnum = 'DEFAULT'
-        sysfile = old_commondata_folder / "systypes" / f"SYSTYPE_{setname}_{sysnum}.dat"
-
-        if not sysfile.exists():
-            raise SysNotFoundError(
-                "Could not find systype %s for dataset '%s'. File %s does not exist."
-                % (sysnum, setname, sysfile)
+                f"Dataset {setname} not found. Is the name correct? Old commondata is no longer accepted"
             )
+        set_path = self.commondata_folder / setfolder
+        if not set_path.exists():
+            raise DataNotFoundError(f"Dataset {setname} not found")
 
-        plotfiles = []
-
-        metadata = peek_commondata_metadata(datafile)
-        process_plotting_root = old_commondata_folder / f'PLOTTINGTYPE_{metadata.process_type}'
-        type_plotting = (
-            process_plotting_root.with_suffix('.yml'),
-            process_plotting_root.with_suffix('.yaml'),
-        )
-
-        data_plotting_root = old_commondata_folder / f'PLOTTING_{setname}'
-
-        data_plotting = (
-            data_plotting_root.with_suffix('.yml'),
-            data_plotting_root.with_suffix('.yaml'),
-        )
-        # TODO: What do we do when both .yml and .yaml exist?
-        for tp in (type_plotting, data_plotting):
-            for p in tp:
-                if p.exists():
-                    plotfiles.append(p)
-        if setname != metadata.name:
-            raise InconsistentMetaDataError(
-                f"The name found in the CommonData file, {metadata.name}, did "
-                f"not match the dataset name, {setname}."
-            )
-
-        return CommonDataSpec(
-            setname, metadata, legacy=True, datafile=datafile, sysfile=sysfile, plotfiles=plotfiles
-        )
+        metadata_path = set_path / "metadata.yaml"
+        metadata = parse_new_metadata(metadata_path, observable_name, variant=variant)
+        return CommonDataSpec(setname, metadata)
 
     @functools.lru_cache
     def check_theoryID(self, theoryID):
@@ -750,8 +667,6 @@ def check_dataset(
         if not isinstance(theoryid, TheoryIDSpec):
             theoryid = self.check_theoryID(theoryid)
 
-        theoryno, _ = theoryid
-
         # TODO:
         # The dataset is checked twice, once here
         # and once by config in produce_commondata
@@ -761,21 +676,7 @@ def check_dataset(
             name, sysnum, use_fitcommondata=use_fitcommondata, fit=fit, variant=variant
         )
 
-        if commondata.legacy:
-            if theoryid.is_pineappl():
-                raise LoaderError(
-                    f"Trying to use a new theory with an old commondata format, surely it must be a mistake: {name}"
-                )
-
-            # Old-format commondata that we haven't been able to translate
-            # allows only for the usage of only old-format theories
-            try:
-                fkspec, op = self.check_compound(theoryno, name, cfac)
-            except CompoundNotFound:
-                fkspec = self.check_fktable(theoryno, name, cfac)
-                op = None
-        else:
-            fkspec, op = self._check_theory_old_or_new(theoryid, commondata, cfac)
+        fkspec, op = self._check_theory_old_or_new(theoryid, commondata, cfac)
 
         # Note this is simply for convenience when scripting. The config will
         # construct the actual Cuts object by itself
diff --git a/validphys2/src/validphys/tests/conftest.py b/validphys2/src/validphys/tests/conftest.py
index fd6b0129d1..bad704d781 100644
--- a/validphys2/src/validphys/tests/conftest.py
+++ b/validphys2/src/validphys/tests/conftest.py
@@ -72,11 +72,11 @@ def tmp(tmpdir):
 THEORYID = 162
 THEORYID_NEW = 399
 THEORY_QED = 398
-FIT = "NNPDF40_nnlo_lowprecision"
+FIT = "NNPDF40_nnlo_low_precision_240916"
 FIT_3REPLICAS = "Basic_runcard_3replicas_lowprec_221130"
 FIT_3REPLICAS_DCUTS = "Basic_runcard_3replicas_diffcuts_230221"
-FIT_ITERATED = "NNPDF40_nnlo_low_precision_iterated"
-PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_221130"
+FIT_ITERATED = "NNPDF40_nnlo_low_precision_240916_iterated"
+PSEUDODATA_FIT = "pseudodata_test_fit_n3fit_240916"
 
 
 base_config = dict(pdf=PDF, use_cuts='nocuts', dataset_inputs=DATA, theoryid=THEORYID_NEW, Q=10)
diff --git a/validphys2/src/validphys/tests/test_loader.py b/validphys2/src/validphys/tests/test_loader.py
index b106d93e98..dead6222d4 100644
--- a/validphys2/src/validphys/tests/test_loader.py
+++ b/validphys2/src/validphys/tests/test_loader.py
@@ -3,6 +3,7 @@
 
 Test loading utilities.
 """
+
 import os
 from pathlib import Path
 import subprocess as sp
@@ -12,6 +13,7 @@
 from hypothesis.strategies import composite, sampled_from, sets
 import pytest
 
+from nnpdf_data import legacy_to_new_map
 from validphys.loader import NNPDF_DIR, FallbackLoader, FitNotFound
 from validphys.plotoptions.core import get_info, kitable
 from validphys.tests.conftest import FIT, FIT_3REPLICAS, THEORYID_NEW
@@ -32,7 +34,8 @@ def load(self):
 @composite
 def commondata_and_cuts(draw):
     old_name = draw(sampled_from(dss))
-    cd = l.check_commondata(old_name, force_old_format=True)
+    new_name, variant = legacy_to_new_map(old_name)
+    cd = l.check_commondata(new_name, variant=variant)
     ndata = cd.metadata.ndata
     # Get a cut mask with at least one selected datapoint
     masks = sets(sampled_from(range(ndata)), min_size=1)
diff --git a/validphys2/src/validphys/tests/test_pseudodata.py b/validphys2/src/validphys/tests/test_pseudodata.py
index 7b4efeb50d..2b0599853d 100644
--- a/validphys2/src/validphys/tests/test_pseudodata.py
+++ b/validphys2/src/validphys/tests/test_pseudodata.py
@@ -75,7 +75,7 @@ def test_no_savepseudodata():
 
 def test_read_matches_recreate():
     reads = API.read_fit_pseudodata(fit=PSEUDODATA_FIT)
-    recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT, separate_multiplicative=True)
+    recreates = API.recreate_fit_pseudodata(fit=PSEUDODATA_FIT)
     for read, recreate in zip(reads, recreates):
         # We ignore the absolute ordering of the dataframes and just check
         # that they contain identical elements.
diff --git a/validphys2/src/validphys/tests/test_pyfkdata.py b/validphys2/src/validphys/tests/test_pyfkdata.py
index 47ab86ef3b..75d5b6727f 100644
--- a/validphys2/src/validphys/tests/test_pyfkdata.py
+++ b/validphys2/src/validphys/tests/test_pyfkdata.py
@@ -8,41 +8,40 @@
 from validphys.fkparser import load_fktable
 from validphys.loader import FallbackLoader as Loader
 from validphys.results import PositivityResult, ThPredictionsResult
-from validphys.tests.conftest import HESSIAN_PDF, PDF, POSITIVITIES, THEORYID, THEORYID_NEW
+from validphys.tests.conftest import DATA, HESSIAN_PDF, PDF, POSITIVITIES, THEORYID, THEORYID_NEW
+
+DS1 = DATA[2]["dataset"]  # hadronic
+DS2 = DATA[0]["dataset"]  # dis
 
 
 def test_basic_loading():
+    """Test the loading of an old theory using directly the legacy name"""
     l = Loader()
     # Test both with and without cfactors, and load both DIS and hadronic
     for cfac in ((), ("QCD",)):
-        fk = l.check_fktable(setname="ATLASTTBARTOT", theoryID=THEORYID, cfac=cfac)
-        res = load_fktable(fk)
-        assert res.ndata == 3
+        ds = l.check_dataset(DS1, theoryid=THEORYID_NEW, cfac=cfac)
+        res = load_fktable(ds.fkspecs[0])
+        assert res.ndata == 50
         assert isinstance(res.sigma, pd.DataFrame)
-    fk = l.check_fktable(setname="H1HERAF2B", theoryID=THEORYID, cfac=())
-    res = load_fktable(fk)
-    assert res.ndata == 12
+    ds = l.check_dataset(DS2, theoryid=THEORYID_NEW)
+    res = load_fktable(ds.fkspecs[0])
+    assert res.ndata == 292
     assert isinstance(res.sigma, pd.DataFrame)
 
-    # Check if cfactors for datasets having one entry are correctly parsed
-    fk = l.check_fktable(setname="CMSTTBARTOT7TEV", theoryID=THEORYID, cfac=("QCD",))
-    res = load_fktable(fk)
-    assert res.ndata == 1
-
 
 def test_cuts():
     l = Loader()
-    ds = l.check_dataset("ATLASTTBARTOT", theoryid=THEORYID, cfac=("QCD",))
+    ds = l.check_dataset(DS1, theoryid=THEORYID_NEW, cfac=("QCD",), variant="legacy")
     table = load_fktable(ds.fkspecs[0])
     # Check explicit cuts
     newtable = table.with_cuts([0, 1])
     assert set(newtable.sigma.index.get_level_values(0)) == {0, 1}
     assert newtable.ndata == 2
-    assert newtable.metadata["GridInfo"].ndata == ds.commondata.ndata
+    assert table.ndata == ds.commondata.ndata
     # Check empty cuts
     assert newtable.with_cuts(None) is newtable
     # Check loaded cuts
-    ds = l.check_dataset("H1HERAF2B", theoryid=THEORYID)
+    ds = l.check_dataset(DS2, theoryid=THEORYID_NEW, variant="legacy")
     table = load_fktable(ds.fkspecs[0])
     newtable = table.with_cuts(ds.cuts)
     assert len(newtable.sigma.index.get_level_values(0).unique()) == len(ds.cuts.load())
@@ -55,17 +54,20 @@ def test_predictions(pdf_name):
     l = Loader()
     pdf = l.check_pdf(pdf_name)
     datasets = [
-        {"name": "ATLASTTBARTOT", "cfac": ("QCD",)},  # cfactors
-        {"name": "H1HERAF2B"},  # DIS, op: NULL
-        {"name": "D0ZRAP"},  # op: RATIO
-        {"name": "D0WEASY"},  # op: ASY
-        {"name": "CMSWCHARMTOT"},  # op: ADD
-        {"name": "ATLASWPT31PB"},  # op: SMN
-        {"name": "DYE906R"},  # op: COM <----
-        {"name": "DYE906_D"},  # op: SMT <----
+        {"name": DS1, "cfac": ("QCD",)},  # cfactors
+        {"name": DS2},  # DIS, op: NULL
+        {"name": "D0_Z0_1P96TEV_ZRAP"},  # op: RATIO
+        {"name": "D0_WPWM_1P96TEV_ASY"},  # op: ASY
+        {"name": "CMS_SINGLETOP_7TEV_TCHANNEL-XSEC"},  # op: ADD
+        # Not included in the light theoryid
+        #         {"name": "DYE906_Z0_120GEV_DW_PDXSECRATIO"},  # op: COM
+        # Not used in any dataset:
+        #         {"name": "DYE906_D"},  # op: SMT
+        #         {"name": "ATLASWPT31PB"},  # op: SMN
     ]
     for daset in datasets:
-        ds = l.check_dataset(**daset, theoryid=THEORYID)
+        daset["variant"] = "legacy"
+        ds = l.check_dataset(**daset, theoryid=THEORYID_NEW)
         preds = predictions(ds, pdf)
         core_predictions = ThPredictionsResult.from_convolution(pdf, ds)
         # Uses rawdata since we want to check all members for which we computed the convolution
@@ -94,13 +96,13 @@ def test_positivity(pdf_name):
     pdf = l.check_pdf(pdf_name)
     for posset in POSITIVITIES:
         # Use the loader to load the positivity dataset
-        ps = l.check_posset(setname=posset, theoryID=THEORYID, postlambda=1e6, rules=())
+        ps = l.check_posset(setname=posset, theoryID=THEORYID_NEW, postlambda=1e6, rules=())
         preds = predictions(ps, pdf)
         core_predictions = PositivityResult.from_convolution(pdf, ps)
         assert_allclose(preds.values, core_predictions.rawdata)
         # Now do the same with the API
         api_predictions = API.positivity_predictions_data_result(
-            theoryid=THEORYID,
+            theoryid=THEORYID_NEW,
             use_cuts="internal",
             pdf=pdf_name,
             posdataset={"dataset": posset, "maxlambda": 1e6},
@@ -112,11 +114,11 @@ def test_positivity(pdf_name):
 
 
 def test_extended_predictions():
-    """Test the python predictions dataframe stasts with MC sets"""
+    """Test the python predictions dataframe stats with MC sets"""
     l = Loader()
     pdf = l.check_pdf(PDF)
-    had = l.check_dataset("ATLASTTBARTOT", theoryid=THEORYID, cfac=("QCD",))
-    dis = l.check_dataset("H1HERAF2B", theoryid=THEORYID)
+    had = l.check_dataset(DS1, theoryid=THEORYID, cfac=("QCD",), variant="legacy")
+    dis = l.check_dataset(DS2, theoryid=THEORYID, variant="legacy")
     dis_all = predictions(dis, pdf).T
     dis_central = central_predictions(dis, pdf).T
     assert np.allclose(dis_all.mean().values, dis_central.values)
@@ -128,34 +130,3 @@ def test_extended_predictions():
     assert np.allclose(had_linear.mean().values, had_central)
     assert not np.allclose(had_all.mean().values, had_central)
     assert np.all((had_linear - had_all).std() < had_all.std())
-
-
-@pytest.mark.parametrize("dataset", ["CMSWMASY47FB", "ATLASWZRAP11CC", "LHCBWZMU7TEV"])
-def test_compare_cf(data_internal_cuts_config, data_internal_cuts_new_theory_config, dataset):
-    """Loads datasets from the two low-precision theories (one old, one new)
-    and checks that the result is the same despite being read differently"""
-    config = dict(data_internal_cuts_config)
-    config_new = dict(data_internal_cuts_new_theory_config)
-
-    pdf = API.pdf(**config)
-
-    dinput = {"dataset": dataset}
-    config["dataset_input"] = dinput
-    config_new["dataset_input"] = dinput
-
-    ds_old = API.dataset(**config)
-    ds_new = API.dataset(**config_new)
-    res_old = central_predictions(ds_old, pdf)
-    res_new = central_predictions(ds_new, pdf)
-
-    dinput["cfac"] = ["QCD"]
-    ds_old_cfac = API.dataset(**config)
-    ds_new_cfac = API.dataset(**config_new)
-
-    res_old_cfac = central_predictions(ds_old_cfac, pdf)
-    res_new_cfac = central_predictions(ds_new_cfac, pdf)
-
-    old_cfac = res_old_cfac / res_old
-    new_cfac = res_new_cfac / res_new
-
-    np.testing.assert_allclose(new_cfac, old_cfac, rtol=1e-4)