NNPDF · scarlehoff · Jul 31, 2024 · May 9, 2024 · May 10, 2024 · May 23, 2024
diff --git a/doc/sphinx/source/figuresofmerit/index.rst b/doc/sphinx/source/figuresofmerit/index.rst
@@ -155,7 +155,7 @@ HERACOMB_SIGMARED_C dataset to 100 by adding the following to the runcard:
 .. code-block:: yaml
 
     dataset_inputs:
-        - {dataset: HERACOMB_SIGMARED_C, frac: 0.75, weight: 100}
+        - {dataset: HERA_NC_318GEV_EAVG_CHARM-SIGMARED, frac: 0.75, variant: legacy, weight: 100}
 
 
 Experimental, validation, and training 𝜒²

diff --git a/doc/sphinx/source/n3fit/hyperopt.rst b/doc/sphinx/source/n3fit/hyperopt.rst
@@ -336,15 +336,15 @@ hyperopt configuration dictionary).
 .. code-block:: yaml
 
         dataset_inputs:
-        - {dataset: NMCPD_dw_ite, frac: 0.75}
-        - {dataset: NMC, frac: 0.75}
-        - {dataset: SLACP_dwsh, frac: 0.75}
-        - {dataset: SLACD_dw_ite, frac: 0.75}
-        - {dataset: BCDMSP_dwsh, frac: 0.75}
-        - {dataset: BCDMSD_dw_ite, frac: 0.75}
-        - {dataset: HERACOMBNCEP575, frac: 0.75}
-        - {dataset: HERACOMBCCEM, frac: 0.75}
-        - {dataset: HERACOMBCCEP, frac: 0.75}
+        - {dataset: NMC_NC_NOTFIXED_DW_EM-F2, frac: 0.75, variant: legacy}
+        - {dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, frac: 0.75, variant: legacy}
+        - {dataset: SLAC_NC_NOTFIXED_P_DW_EM-F2, frac: 0.75, variant: legacy}
+        - {dataset: SLAC_NC_NOTFIXED_D_DW_EM-F2, frac: 0.75, variant: legacy}
+        - {dataset: BCDMS_NC_NOTFIXED_P_DW_EM-F2, frac: 0.75, variant: legacy}
+        - {dataset: BCDMS_NC_NOTFIXED_D_DW_EM-F2, frac: 0.75, variant: legacy}
+        - {dataset: HERA_NC_251GEV_EP-SIGMARED, frac: 0.75, variant: legacy}
+        - {dataset: HERA_CC_318GEV_EM-SIGMARED, frac: 0.75, variant: legacy}
+        - {dataset: HERA_CC_318GEV_EP-SIGMARED frac: 0.75, variant: legacy}
 
         hyperscan_config:
           use_tries_from: 210508-hyperopt_for_paper
@@ -353,9 +353,9 @@ hyperopt configuration dictionary).
           target: fit_future_tests
           partitions:
           - datasets:
-            - HERACOMBCCEP
-            - HERACOMBCCEM
-            - HERACOMBNCEP575
+            - HERA_CC_318GEV_EP-SIGMARED
+            - HERA_CC_318GEV_EM-SIGMARED
+            - HERA_NC_251GEV_EP-SIGMARED
           - datasets:
 
         parallel_models: true

diff --git a/doc/sphinx/source/n3fit/runcard_detailed.rst b/doc/sphinx/source/n3fit/runcard_detailed.rst
@@ -26,9 +26,9 @@ The first thing one finds when building a fit runcard for
 .. code-block:: yaml
 
     dataset_inputs:
-        - { dataset: SLACP_dwsh, frac: 0.5}
-        - { dataset: NMCPD_dw, frac: 0.5 }
-        - { dataset: ATLASZPT8TEVMDIST, frac: 0.75, sys: 10, cfac: [QCD] }
+        - { dataset: SLAC_NC_NOTFIXED_P_DW_EM-F2, frac: 0.5, variant: legacy}
+        - { dataset: NMC_NC_NOTFIXED_DW_EM-F2, frac: 0.5, variant: legacy }
+        - { dataset: ATLAS_Z0J_8TEV_PT-M, frac: 0.75, variant: legacy_10}
 
 
 The `dataset_inputs` key contains a list of dictionaries defining the datasets
@@ -41,8 +41,8 @@ The fraction of events that are considered for the training and validation sets
 .. code-block:: yaml
 
     dataset_inputs:
-    - { dataset: SLACP_dwsh, frac: 0.75}
-
+    - { dataset: SLAC_NC_NOTFIXED_P_DW_EM-F2, frac: 0.75, variant: legacy}
+  
 It is possible to run a fit with no validation set by setting the fraction to ``1.0``, in this case the training set will be used as validation set.
 
 The random seed for the training/validation split is defined by the variable ``trvlseed``.

diff --git a/doc/sphinx/source/vp/complex_runcards.rst b/doc/sphinx/source/vp/complex_runcards.rst
@@ -24,7 +24,7 @@ We start with the following simple example:
 	use_cuts: "internal"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:
@@ -65,7 +65,7 @@ In this case we can modify the example as follows:
 	  use_cuts: "nocuts"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:
@@ -104,7 +104,7 @@ respect to the outer. For example, if we add a PDF specification to the
 	  use_cuts: "nocuts"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:
@@ -140,7 +140,7 @@ result for each. For example:
 	- use_cuts: "nocuts"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:
@@ -177,7 +177,7 @@ something like this:
 	use_cuts: "nocuts"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:
@@ -199,7 +199,7 @@ specifying that we want to loop over `pdfs`:
 	use_cuts: "nocuts"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:
@@ -245,9 +245,9 @@ Consider the example:
 	    use_cuts : "nocuts"
 
 	dataset_inputs:
-	    - { dataset: LHCBWZMU7TEV, cfac: [NRM] }
-	    - { dataset: LHCBWZMU8TEV, cfac: [NRM] }
-	    - { dataset: ATLASWZRAP36PB }
+	    - { dataset: LHCB_DY_7TEV_MUON_Y, cfac: [NRM] }
+	    - { dataset: LHCB_Z0_8TEV_MUON_Y, cfac: [NRM] }
+	    - { dataset: ATLAS_DY_7TEV_36PB_ETA }
 
 	actions_:
 	  - With_cuts::theoryids::pdfs::dataset_inputs plot_fancy

diff --git a/doc/sphinx/source/vp/cuts.md b/doc/sphinx/source/vp/cuts.md
@@ -129,21 +129,21 @@ meta:
 show_total: True
 
 NNLODatasts: &NNLODatasts
-- {dataset: ATLAS_SINGLETOP_TCH_R_7TEV, frac: 1.0, cfac: [QCD]}                      # N
-- {dataset: ATLAS_SINGLETOP_TCH_R_13TEV, frac: 1.0, cfac: [QCD]}                     # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM, frac: 1.0, cfac: [QCD]}        # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM, frac: 1.0, cfac: [QCD]}     # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM, frac: 0.75, cfac: [QCD]}       # N
+- {dataset: ATLAS_SINGLETOP_7TEV_TCHANNEL-XSEC, frac: 1.0, variant: legacy}       # N
+- {dataset: ATLAS_SINGLETOP_13TEV_TCHANNEL-XSEC, frac: 1.0, variant: legacy}      # N
+- {dataset: ATLAS_SINGLETOP_7TEV_T-Y-NORM, frac: 1.0, variant: legacy}            # N
+- {dataset: ATLAS_SINGLETOP_7TEV_TBAR-Y-NORM, frac: 1.0, variant: legacy}         # N
+- {dataset: ATLAS_SINGLETOP_8TEV_T-RAP-NORM, frac: 0.75, variant: legacy}         # N
 
 NLODatasts: &NLODatasts
-- {dataset: ATLAS_SINGLETOP_TCH_R_7TEV, frac: 1.0, cfac: []}                      # N
-- {dataset: ATLAS_SINGLETOP_TCH_R_13TEV, frac: 1.0, cfac: []}                     # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_T_RAP_NORM, frac: 1.0, cfac: []}        # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_7TEV_TBAR_RAP_NORM, frac: 1.0, cfac: []}     # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_T_RAP_NORM, frac: 0.75, cfac: []}       # N
-- {dataset: ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM, frac: 0.75, cfac: []}    # N
-
-do_not_require_similarity_for: [ATLAS_SINGLETOP_TCH_DIFF_8TEV_TBAR_RAP_NORM]
+- {dataset: ATLAS_SINGLETOP_7TEV_TCHANNEL-XSEC, frac: 1.0, variant: legacy}       # N
+- {dataset: ATLAS_SINGLETOP_13TEV_TCHANNEL-XSEC, frac: 1.0, variant: legacy}      # N
+- {dataset: ATLAS_SINGLETOP_7TEV_T-Y-NORM, frac: 1.0, variant: legacy}            # N
+- {dataset: ATLAS_SINGLETOP_7TEV_TBAR-Y-NORM, frac: 1.0, variant: legacy}         # N
+- {dataset: ATLAS_SINGLETOP_8TEV_T-RAP-NORM, frac: 0.75, variant: legacy}         # N
+- {dataset: ATLAS_SINGLETOP_8TEV_TBAR-RAP-NORM, frac: 0.75, variant: legacy}      # N
+
+do_not_require_similarity_for: [ATLAS_SINGLETOP_8TEV_TBAR-RAP-NORM]
 
 
 dataset_inputs: *NLODatasts

diff --git a/doc/sphinx/source/vp/dataspecification.rst b/doc/sphinx/source/vp/dataspecification.rst
@@ -35,11 +35,11 @@ Here is an example dataset input:
 .. code:: yaml
 
     dataset_input:
-        dataset: CMSZDIFF12
-        cfac: [QCD,NRM]
-        sys: 10
+        dataset: CMS_Z0J_8TEV_PT-Y
+        cfac: [NRM]
+        variant: legacy_10
 
-This particular example is for the ``CMSZDIFF12`` dataset, the user has
+This particular example is for the ``CMS_Z0J_8TEV_PT-Y`` dataset, the user has
 specified to use some C-factors given by ``cfac`` as well as ``sys: 10``, which
 corresponds to an additonal contribution to the covariance matrix accounting for
 statistical fluctuations in the C-factors. These settings correspond to NNLO
@@ -53,7 +53,7 @@ environment using the settings above
 
     >>> from validphys.api import API
     >>> ds_spec = API.dataset(
-    ...     dataset_input={"dataset": "CMSZDIFF12", "cfac": ["QCD", "NRM"], "sys": 10},
+    ...     dataset_input={"dataset": "CMS_Z0J_8TEV_PT-Y", "cfac": ["NRM"], "variant": "legacy_10"},
     ...     use_cuts="internal",
     ...     theoryid=53
     ... )
@@ -84,7 +84,7 @@ The ``DataSetSpec`` contains all of the information used to construct it, e.g.
     >>> ds_spec.thspec
     TheoryIDSpec(id=53, path=PosixPath('/Users/michael/conda/envs/nnpdf-dev/share/NNPDF/data/theory_53'))
     >>> ds_spec.name
-    'CMSZDIFF12'
+    'CMS_Z0J_8TEV_PT-Y'
 
 but also importantly has a ``load_commondata`` method, which returns an instance of the
 ``CommonData``. This new object contains numpy arrays of data central values and experimental covariance
@@ -109,7 +109,7 @@ specify a PDF
 .. code:: python
 
     >>> results = API.results(
-    ...     dataset_input={"dataset": "CMSZDIFF12", "cfac": ["QCD", "NRM"], "sys": 10},
+    ...     dataset_input={"dataset": "CMS_Z0J_8TEV_PT-Y", "cfac": ["NRM"], "variant": "legacy_10"},
     ...     use_cuts="internal",
     ...     theoryid=53,
     ...     pdf="NNPDF31_nnlo_as_0118"
@@ -147,9 +147,9 @@ example:
 .. code:: yaml
 
     dataset_inputs:
-        - { dataset: NMC }
-        - { dataset: ATLASTTBARTOT, cfac: [QCD] }
-        - { dataset: CMSZDIFF12, cfac: [QCD,NRM], sys: 10 }
+        - { dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, variant: legacy }
+        - { dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC, variant: legacy}
+        - { dataset: CMS_Z0J_8TEV_PT-Y, cfac: [NRM], variant: legacy_10 }
 
 We see that multiple datasets are inputted as a flat list, i.e. that there is no
 hierarchy to the datasets which splits them into experiments or process types.
@@ -170,15 +170,15 @@ for that specific group e.g:
 
     >>> API.group_dataset_inputs_by_metadata(
     ...    dataset_inputs=[
-    ...        {"dataset":"NMC"},
-    ...        {"dataset": "ATLASTTBARTOT", "cfac": ["QCD"]},
-    ...        {"dataset": "CMSZDIFF12", "cfac": ["QCD","NRM"], "sys": 10 }],
+    ...        {"dataset":"NMC_NC_NOTFIXED_P_EM-SIGMARED", "variant": "legacy"},
+    ...        {"dataset": "ATLAS_TTBAR_7TEV_TOT_X-SEC", "variant": "legacy"},
+    ...        {"dataset": "CMS_Z0J_8TEV_PT-Y", "cfac": ["NRM"], "variant": "legacy_10" }],
     ...    metadata_group="experiment"
     ... )
     [
-        {'data_input': [DataSetInput(name='NMC', sys=None, cfac=(), frac=1, weight=1)], 'group_name': 'NMC'},
-        {'data_input': [DataSetInput(name='ATLASTTBARTOT', sys=None, cfac=['QCD'], frac=1, weight=1)], 'group_name': 'ATLAS'},
-        {'data_input': [DataSetInput(name='CMSZDIFF12', sys=10, cfac=['QCD', 'NRM'], frac=1, weight=1)], 'group_name': 'CMS'}
+        {'data_input': [DataSetInput(name='NMC_NC_NOTFIXED_P_EM-SIGMARED', sys=None, cfac=(), frac=1, weight=1, custom_group='unset', variant='legacy')],'group_name': 'NMC'},
+        {'data_input': [DataSetInput(name='ATLAS_TTBAR_7TEV_TOT_X-SEC', sys=None, cfac=(), frac=1, weight=1, custom_group='unset', variant='legacy')],'group_name': 'ATLAS'}
+        {'data_input': [DataSetInput(name='CMS_Z0J_8TEV_PT-Y', sys=None, cfac=['NRM'], frac=1, weight=1, custom_group='unset', variant='legacy_10')],'group_name': 'CMS'}
     ]
 
 Here we see that the namespace key is ``data_input`` rather than
@@ -198,9 +198,9 @@ containing the 𝞆² of the specificed datasets, grouped by ``experiment``:
 .. code:: yaml
 
     dataset_inputs:
-        - { dataset: NMC }
-        - { dataset: ATLASTTBARTOT, cfac: [QCD] }
-        - { dataset: CMSZDIFF12, cfac: [QCD,NRM], sys: 10 }
+        - { dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, variant: legacy }
+        - { dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC", variant: legacy}
+        - { dataset: CMS_Z0J_8TEV_PT-Y, cfac: [NRM], variant: legacy_10}
 
     theoryid: 53
 
@@ -220,9 +220,9 @@ If we specify a ``metadata_group`` in the runcard, like so
     metadata_group: nnpdf31_process
 
     dataset_inputs:
-        - { dataset: NMC }
-        - { dataset: ATLASTTBARTOT, cfac: [QCD] }
-        - { dataset: CMSZDIFF12, cfac: [QCD,NRM], sys: 10 }
+        - { dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, variant: legacy }
+        - { dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC", variant: legacy}
+        - { dataset: CMS_Z0J_8TEV_PT-Y, cfac: [NRM], variant: legacy_10}
 
     theoryid: 53
 
@@ -270,13 +270,13 @@ the ``custom_group`` key to each dataset_input as follows
   metadata_group: custom_group
 
   dataset_inputs:
-    - { dataset: NMC, custom_group: traca }
-    - { dataset: NMCPD, custom_group: traco }
-    - { dataset: LHCBWZMU7TEV, cfac: [NRM], custom_group: pepe }
-    - { dataset: LHCBWZMU8TEV, cfac: [NRM], custom_group: pepa }
-    - { dataset: ATLASWZRAP36PB}
+    - { dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, variant: legacy, custom_group: traca }
+    - { dataset: NMC_NC_NOTFIXED_EM-F2, variant: legacy, custom_group: traco }
+    - { dataset: LHCB_DY_7TEV_MUON_Y, cfac: [NRM], custom_group: pepe }
+    - { dataset: LHCB_DY_8TEV_MUON_Y, cfac: [NRM], custom_group: pepa }
+    - { dataset: ATLAS_DY_7TEV_36PB_ETA, variant: legacy}
 
-Note that we didn't set any group for ``ATLASWZRAP36PB``, but that's ok: any
+Note that we didn't set any group for ``ATLAS_DY_7TEV_36PB_ETA``, but that's ok: any
 datasets which are not explicitly given a ``custom_group`` get put into the
 ``unset`` group.
 
@@ -335,13 +335,13 @@ input
     experiments:
      - experiment: NMC
        datasets:
-        - { dataset: NMC }
+        - { dataset: NMC_NC_NOTFIXED_P_EM-SIGMARED, variant: legacy }
      - experiment: ATLAS
        datasets:
-        - { dataset: ATLASTTBARTOT, cfac: [QCD] }
+        - { dataset: ATLAS_TTBAR_7TEV_TOT_X-SEC, variant: legacy }
      - experiment: CMS
        datasets:
-        - { dataset: CMSZDIFF12, cfac: [QCD,NRM], sys: 10 }
+        - { dataset: CMS_Z0J_8TEV_PT-Y, cfac: [NRM], variant: legacy_10 }
 
     theoryid: 53
 

diff --git a/doc/sphinx/source/vp/design.rst b/doc/sphinx/source/vp/design.rst
@@ -240,12 +240,12 @@ runcard:
    experiments:
      - experiment: LHCb
        datasets:
-         - { dataset: LHCBWZMU7TEV, cfac: [NRM] }
-         - { dataset: LHCBWZMU8TEV, cfac: [NRM] }
+         - { dataset: LHCB_DY_7TEV_MUON_Y, cfac: [NRM], variant: legacy }
+         - { dataset: LHCB_DY_8TEV_MUON_Y, cfac: [NRM], variant: legacy }
 
      - experiment: ATLAS
        datasets:
-         - { dataset: ATLASWZRAP36PB}
+         - { dataset: ATLAS_DY_7TEV_36PB_ETA, variant: legacy }
 
    actions_:
     - theoryids::pdfs::experiments::experiment plot_fancy

diff --git a/doc/sphinx/source/vp/download.md b/doc/sphinx/source/vp/download.md
@@ -32,7 +32,7 @@ theoryid: 208
 use_cuts: "fromfit"
 
 dataset_input:
-    dataset: ATLASWZRAP36PB
+    dataset: ATLAS_DY_7TEV_36PB_ETA
     cfac: [EWK]
 
 actions_:

diff --git a/doc/sphinx/source/vp/getting-started.rst b/doc/sphinx/source/vp/getting-started.rst
@@ -23,7 +23,7 @@ A simple example is:
 	use_cuts: "internal"
 
 	dataset_input:
-	    dataset: ATLASWZRAP36PB
+	    dataset: ATLAS_DY_7TEV_36PB_ETA
 	    cfac: [EWK]
 
 	actions_:

diff --git a/validphys2/examples/API_example.ipynb b/validphys2/examples/API_example.ipynb
@@ -30,7 +30,7 @@
    "source": [
     "l = Loader()\n",
     "thno = 399\n",
-    "dsname = 'ATLAS_WZ_TOT_13TEV'\n",
+    "dsname = 'ATLAS_DY_13TEV_TOT'\n",
     "cfactors = ['NRM', 'QCD']\n",
     "pdfs = ['NNPDF40_nnlo_lowprecision', 'NNPDF40_nnlo_as_01180']\n",
     "inp = {\n",
@@ -400,7 +400,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.12.3"
   }
  },
  "nbformat": 4,

diff --git a/validphys2/examples/chi2_tables_montecarlo_hessian.yaml b/validphys2/examples/chi2_tables_montecarlo_hessian.yaml
@@ -1,15 +1,13 @@
 meta:
   title: Chi2 with Monte Carlo and Hessian sets
   author: Mark N. Costantini
-  keywords: [example]
+  keywords: [chi2]
 
 dataset_inputs: 
-- {dataset: SLACP_dwsh, frac: 0.75}
-- {dataset: HERACOMBNCEM, frac: 0.75}
-- {dataset: HERACOMBNCEP460, frac: 0.75}
-- {dataset: DYE605_dw_ite, frac: 0.75, cfac: [QCD]}
-
-
+- {dataset: SLAC_NC_NOTFIXED_P_DW_EM-F2, variant: legacy}
+- {dataset: HERA_NC_318GEV_EM-SIGMARED, variant: legacy}
+- {dataset: HERA_NC_225GEV_EP-SIGMARED, variant: legacy}
+- {dataset: DYE605_Z0_38P8GEV_DW_PXSEC, variant: legacy}
 
 # used to add the PDF covariance matrix to the experimental one
 use_pdferr: True
@@ -26,7 +24,6 @@ pdfs:
   - CT18NNLO
   - ABMP16_5_nnlo
 
-
 template_text: |
 
   Chi2 Report