Merge branch 'master' into check_newcd

NNPDF · Jul 17, 2024 · b4d7b13 · b4d7b13
2 parents 5aa3c9d + e3f7086
commit b4d7b13
Show file tree

Hide file tree

Showing 227 changed files with 29,135 additions and 638 deletions.
diff --git a/n3fit/runcards/examples/nnpdf40-like.yml b/n3fit/runcards/examples/nnpdf40-like.yml
@@ -2,7 +2,7 @@
 # Configuration file for n3fit
 #
 ################################################################################
-description: NNPDF4.0 NNLO baseline fit (nFONLL). Comparable to NNPDF40_nnlo_as_01180_qcd
+description: NNLO baseline fit, NNPDF4.0 dataset
 
 ################################################################################
 dataset_inputs:
@@ -91,7 +91,7 @@ dataset_inputs:
 
 ################################################################################
 datacuts:
-  t0pdfset: NNPDF40_nnlo_as_01180
+  t0pdfset: 240701-02-rs-nnpdf40-baseline
   q2min: 3.49
   w2min: 12.5
 
@@ -136,34 +136,56 @@ fitting:
   fitbasis: EVOL
   savepseudodata: True
   basis:
-  - {fl: sng, trainable: false, smallx: [1.091, 1.119], largex: [1.471, 3.021]}
-  - {fl: g, trainable: false, smallx: [0.7795, 1.095], largex: [2.742, 5.547]}
-  - {fl: v, trainable: false, smallx: [0.472, 0.7576], largex: [1.571, 3.559]}
-  - {fl: v3, trainable: false, smallx: [0.07483, 0.4501], largex: [1.714, 3.467]}
-  - {fl: v8, trainable: false, smallx: [0.5731, 0.779], largex: [1.555, 3.465]}
-  - {fl: t3, trainable: false, smallx: [-0.5498, 1.0], largex: [1.778, 3.5]}
-  - {fl: t8, trainable: false, smallx: [0.5469, 0.857], largex: [1.555, 3.391]}
-  - {fl: t15, trainable: false, smallx: [1.081, 1.142], largex: [1.491, 3.092]}
+  - {fl: sng, trainable: false, smallx: [1.089, 1.119], largex: [1.475, 3.119]}
+  - {fl: g, trainable: false, smallx: [0.7504, 1.098], largex: [2.814, 5.669]}
+  - {fl: v, trainable: false, smallx: [0.479, 0.7384], largex: [1.549, 3.532]}
+  - {fl: v3, trainable: false, smallx: [0.1073, 0.4397], largex: [1.733, 3.458]}
+  - {fl: v8, trainable: false, smallx: [0.5507, 0.7837], largex: [1.516, 3.356]}
+  - {fl: t3, trainable: false, smallx: [-0.4506, 0.9305], largex: [1.745, 3.424]}
+  - {fl: t8, trainable: false, smallx: [0.5877, 0.8687], largex: [1.522, 3.515]}
+  - {fl: t15, trainable: false, smallx: [1.089, 1.141], largex: [1.492, 3.222]}
 
 ################################################################################
 positivity:
   posdatasets:
-  - {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6}        # Positivity Lagrange Multiplier
+  # Positivity Lagrange Multiplier
+  - {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_F2D, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_F2S, maxlambda: 1e6}
-  - {dataset: NNPDF_POS_2P24GEV_FLL-19PTS, maxlambda: 1e6}
+  - {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_DYU, maxlambda: 1e10}
   - {dataset: NNPDF_POS_2P24GEV_DYD, maxlambda: 1e10}
   - {dataset: NNPDF_POS_2P24GEV_DYS, maxlambda: 1e10}
-  - {dataset: NNPDF_POS_2P24GEV_F2C-17PTS, maxlambda: 1e6}
-  - {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6}        # Positivity of MSbar PDFs
+  - {dataset: NNPDF_POS_2P24GEV_F2C, maxlambda: 1e6}
+  # Positivity of MSbar PDFs
+  - {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_XUB, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_XDQ, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_XDB, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_XSQ, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_XSB, maxlambda: 1e6}
   - {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6}
 
+added_filter_rules:
+  - dataset: NNPDF_POS_2P24GEV_FLL
+    rule: "x > 5.0e-7"
+  - dataset: NNPDF_POS_2P24GEV_F2C
+    rule: "x < 0.74"
+  - dataset: NNPDF_POS_2P24GEV_XGL
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XUQ
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XUB
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XDQ
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XDB
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XSQ
+    rule: "x > 0.1"
+  - dataset: NNPDF_POS_2P24GEV_XSB
+    rule: "x > 0.1"
+
 integrability:
   integdatasets:
   - {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2}

diff --git a/nnpdf_data/nnpdf_data/filter_utils/correlations.py b/nnpdf_data/nnpdf_data/filter_utils/correlations.py
@@ -0,0 +1,90 @@
+import numpy as np
+from numpy.linalg import eig
+
+
+def upper_triangular_to_symmetric(ut, dim):
+    """Build a symmetric matrix from the upper diagonal"""
+    corr = np.zeros((dim, dim))
+    last = dim
+    first = 0
+    for i in range(dim):
+        corr[i, i:] = ut[first:last]
+        last += dim - i - 1
+        first += dim - i
+    return corr
+
+
+def compute_covmat(corrmat: np.ndarray, unc: np.ndarray, ndata: int) -> list:
+    """Compute the covariance matrix with the artificial stat uncertainties."""
+    # multiply by stat err
+    cov_mat = np.einsum("i,ij,j->ij", unc, corrmat, unc)
+    return covmat_to_artunc(ndata, cov_mat.flatten().tolist())
+
+
+def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0):
+    r"""Convert the covariance matrix to a matrix of
+    artificial uncertainties.
+
+    NOTE: This function has been taken from validphys.newcommondata_utils.
+    If those utils get merged in the future, we can replace this.
+
+    Parameters
+    ----------
+    ndata : integer
+        Number of data points
+    covmat_list : list
+        A one dimensional list which contains the elements of
+        the covariance matrix row by row. Since experimental
+        datasets provide these matrices in a list form, this
+        simplifies the implementation for the user.
+    no_of_norm_mat : int
+        Normalized covariance matrices may have an eigenvalue
+        of 0 due to the last data point not being linearly
+        independent. To allow for this, the user should input
+        the number of normalized matrices that are being treated
+        in an instance. For example, if a single covariance matrix
+        of a normalized distribution is being processed, the input
+        would be 1. If a covariance matrix contains pertains to
+        3 normalized datasets (i.e. cross covmat for 3
+        distributions), the input would be 3. The default value is
+        0 for when the covariance matrix pertains to an absolute
+        distribution.
+
+    Returns
+    -------
+    artunc : list
+        A two dimensional matrix (given as a list of lists)
+        which contains artificial uncertainties to be added
+        to the commondata. i^th row (or list) contains the
+        artificial uncertainties of the i^th data point.
+
+    """
+    epsilon = -0.0000000001
+    neg_eval_count = 0
+    psd_check = True
+    covmat = np.zeros((ndata, ndata))
+    artunc = np.zeros((ndata, ndata))
+    for i in range(len(covmat_list)):
+        a = i // ndata
+        b = i % ndata
+        covmat[a][b] = covmat_list[i]
+    eigval, eigvec = eig(covmat)
+    for j in range(len(eigval)):
+        if eigval[j] < epsilon:
+            psd_check = False
+        elif eigval[j] > epsilon and eigval[j] <= 0:
+            neg_eval_count = neg_eval_count + 1
+            if neg_eval_count == (no_of_norm_mat + 1):
+                psd_check = False
+        elif eigval[j] > 0:
+            continue
+    if psd_check == False:
+        raise ValueError("The covariance matrix is not positive-semidefinite")
+    else:
+        for i in range(ndata):
+            for j in range(ndata):
+                if eigval[j] < 0:
+                    continue
+                else:
+                    artunc[i][j] = eigvec[i][j] * np.sqrt(eigval[j])
+    return artunc.tolist()
diff --git a/nnpdf_data/nnpdf_data/filter_utils/uncertainties.py b/nnpdf_data/nnpdf_data/filter_utils/uncertainties.py
@@ -0,0 +1,27 @@
+
+import numpy as np
+
+def symmetrize_errors(delta_plus, delta_minus):
+    r"""Compute the symmetrized uncertainty and the shift in data point.
+
+    Parameters
+    ----------
+    delta_plus : float
+        The top/plus uncertainty with sign
+    delta_minus : float
+        The bottom/minus uncertainty with sign
+
+    Returns
+    -------
+    se_delta : float
+        The value to be added to the data point
+    se_sigma : float
+        The symmetrized uncertainty to be used in commondata
+
+    """
+    semi_diff = (delta_plus + delta_minus) / 2
+    average = (delta_plus - delta_minus) / 2
+    se_delta = semi_diff
+    se_sigma = np.sqrt(average * average + 2 * semi_diff * semi_diff)
+    return se_delta, se_sigma
+
diff --git a/nnpdf_data/nnpdf_data/new_commondata/E155_NC_9GEV_EN/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/E155_NC_9GEV_EN/metadata.yaml
@@ -52,4 +52,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - E155_NC_9GEV_EN_G1
-      operation: 'null'
+        - - E155_NC_9GEV_EN_F1
+      operation: 'ratio'
diff --git a/nnpdf_data/nnpdf_data/new_commondata/E155_NC_9GEV_EP/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/E155_NC_9GEV_EP/metadata.yaml
@@ -52,4 +52,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - E155_NC_9GEV_EP_G1
-      operation: 'null'
+        - - E155_NC_9GEV_EP_F1
+      operation: 'ratio'
diff --git a/nnpdf_data/nnpdf_data/new_commondata/HERMES_NC_7GEV_ED/filter.py b/nnpdf_data/nnpdf_data/new_commondata/HERMES_NC_7GEV_ED/filter.py
@@ -9,8 +9,7 @@
 HERE = pathlib.Path(__file__).parent
 sys.path = [str(HERE.parent / "HERMES_NC_7GEV_EP")] + sys.path
 
-from filter import compute_covmat
-
+from nnpdf_data.filter_utils.correlations import compute_covmat
 
 def read_data(fnames):
     df = pd.DataFrame()
@@ -81,11 +80,9 @@ def write_data(df):
     # Extract the correlation matrix and compute artificial systematics
     ndata_points = len(data_central)
     corrmatrix = read_corrmatrix(nb_datapoints=ndata_points)
-    # Compute the covariance matrix
-    compute_covmat(corrmatrix, df, ndata_points)
 
     # Compute the covariance matrix
-    art_sys = compute_covmat(corrmatrix, df, ndata_points)
+    art_sys = compute_covmat(corrmatrix, df['stat'], ndata_points)
 
     error = []
     for i in range(ndata_points):

diff --git a/nnpdf_data/nnpdf_data/new_commondata/HERMES_NC_7GEV_EP/filter.py b/nnpdf_data/nnpdf_data/new_commondata/HERMES_NC_7GEV_EP/filter.py
@@ -2,10 +2,10 @@
 import pathlib
 
 import numpy as np
-from numpy.linalg import eig
 import pandas as pd
 import yaml
 
+from nnpdf_data.filter_utils.correlations import compute_covmat
 
 def read_data(fnames):
     df = pd.DataFrame()
@@ -49,84 +49,6 @@ def read_corrmatrix(nb_datapoints: int = 15) -> np.ndarray:
 
     return df_corrs.value.values.reshape((nb_datapoints, nb_datapoints))
 
-
-def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0):
-    r"""Convert the covariance matrix to a matrix of
-    artificial uncertainties.
-
-    NOTE: This function has been taken from validphys.newcommondata_utils.
-    If those utils get merged in the future, we can replace this.
-
-    Parameters
-    ----------
-    ndata : integer
-        Number of data points
-    covmat_list : list
-        A one dimensional list which contains the elements of
-        the covariance matrix row by row. Since experimental
-        datasets provide these matrices in a list form, this
-        simplifies the implementation for the user.
-    no_of_norm_mat : int
-        Normalized covariance matrices may have an eigenvalue
-        of 0 due to the last data point not being linearly
-        independent. To allow for this, the user should input
-        the number of normalized matrices that are being treated
-        in an instance. For example, if a single covariance matrix
-        of a normalized distribution is being processed, the input
-        would be 1. If a covariance matrix contains pertains to
-        3 normalized datasets (i.e. cross covmat for 3
-        distributions), the input would be 3. The default value is
-        0 for when the covariance matrix pertains to an absolute
-        distribution.
-
-    Returns
-    -------
-    artunc : list
-        A two dimensional matrix (given as a list of lists)
-        which contains artificial uncertainties to be added
-        to the commondata. i^th row (or list) contains the
-        artificial uncertainties of the i^th data point.
-
-    """
-    epsilon = -0.0000000001
-    neg_eval_count = 0
-    psd_check = True
-    covmat = np.zeros((ndata, ndata))
-    artunc = np.zeros((ndata, ndata))
-    for i in range(len(covmat_list)):
-        a = i // ndata
-        b = i % ndata
-        covmat[a][b] = covmat_list[i]
-    eigval, eigvec = eig(covmat)
-    for j in range(len(eigval)):
-        if eigval[j] < epsilon:
-            psd_check = False
-        elif eigval[j] > epsilon and eigval[j] <= 0:
-            neg_eval_count = neg_eval_count + 1
-            if neg_eval_count == (no_of_norm_mat + 1):
-                psd_check = False
-        elif eigval[j] > 0:
-            continue
-    if psd_check == False:
-        raise ValueError('The covariance matrix is not positive-semidefinite')
-    else:
-        for i in range(ndata):
-            for j in range(ndata):
-                if eigval[j] < 0:
-                    continue
-                else:
-                    artunc[i][j] = eigvec[i][j] * np.sqrt(eigval[j])
-    return artunc.tolist()
-
-
-def compute_covmat(corrmat: np.ndarray, df: pd.DataFrame, ndata: int) -> list:
-    """Compute the covariance matrix with the artificial stat uncertanties."""
-    # multiply by stat err
-    stat = df["stat"]
-    cov_mat = np.einsum("i,ij,j->ij", stat, corrmat, stat)
-    return covmat_to_artunc(ndata, cov_mat.flatten().tolist())
-
-
 def write_data(df):
     data_central = []
     for i in range(len(df["G"])):
@@ -153,11 +75,9 @@ def write_data(df):
     # Extract the correlation matrix and compute artificial systematics
     ndata_points = len(data_central)
     corrmatrix = read_corrmatrix(nb_datapoints=ndata_points)
-    # Compute the covariance matrix
-    compute_covmat(corrmatrix, df, ndata_points)
 
     # Compute the covariance matrix
-    art_sys = compute_covmat(corrmatrix, df, ndata_points)
+    art_sys = compute_covmat(corrmatrix, df['stat'], ndata_points)
 
     error = []
     for i in range(ndata_points):

diff --git a/nnpdf_data/nnpdf_data/new_commondata/JLABE06_NC_3GEV_EN/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/JLABE06_NC_3GEV_EN/metadata.yaml
@@ -48,4 +48,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - JLABE06_NC_3GEV_EN_G1
-      operation: "null"
+        - - JLABE06_NC_3GEV_EN_F1
+      operation: "ratio"
diff --git a/nnpdf_data/nnpdf_data/new_commondata/JLABE99_NC_3GEV_EN/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/JLABE99_NC_3GEV_EN/metadata.yaml
@@ -49,4 +49,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - JLABE99_NC_3GEV_EN_G1
-      operation: "null"
+        - - JLABE99_NC_3GEV_EN_F1
+      operation: "ratio"
diff --git a/nnpdf_data/nnpdf_data/new_commondata/JLABEG1B_NC_NOTFIXED_ED/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/JLABEG1B_NC_NOTFIXED_ED/metadata.yaml
@@ -528,4 +528,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - JLABEG1B_NC_NOTFIXED_ED_G1
-      operation: "null"
+        - - JLABEG1B_NC_NOTFIXED_ED_F1
+      operation: "ratio"
diff --git a/nnpdf_data/nnpdf_data/new_commondata/JLABEG1B_NC_NOTFIXED_EP/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/JLABEG1B_NC_NOTFIXED_EP/metadata.yaml
@@ -188,4 +188,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - JLABEG1B_NC_NOTFIXED_EP_G1
-      operation: "null"
+        - - JLABEG1B_NC_NOTFIXED_EP_F1
+      operation: "ratio"
diff --git a/nnpdf_data/nnpdf_data/new_commondata/JLABEG1DVCS_NC_3GEV_EP/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/JLABEG1DVCS_NC_3GEV_EP/metadata.yaml
@@ -52,4 +52,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - JLABEG1DVCS_NC_3GEV_EP_G1
-      operation: "null"
+        - - JLABEG1DVCS_NC_3GEV_EP_F1
+      operation: "ratio"
diff --git a/nnpdf_data/nnpdf_data/new_commondata/JLABEG1DVCS_NC_5GEV_ED/metadata.yaml b/nnpdf_data/nnpdf_data/new_commondata/JLABEG1DVCS_NC_5GEV_ED/metadata.yaml
@@ -52,4 +52,5 @@ implemented_observables:
     theory:
       FK_tables:
         - - JLABEG1DVCS_NC_5GEV_ED_G1
-      operation: "null"
+        - - JLABEG1DVCS_NC_5GEV_ED_F1
+      operation: "ratio"
diff --git a/.../NNPDF_INTEG_1GEV/kinematics_XDELTAG.yaml → ...a/NNPDF_INTEG_1GEV/kinematics_XDELTA.yaml b/.../NNPDF_INTEG_1GEV/kinematics_XDELTAG.yaml → ...a/NNPDF_INTEG_1GEV/kinematics_XDELTA.yaml