Skip to content

Commit

Permalink
Merge branch 'master' into check_newcd
Browse files Browse the repository at this point in the history
  • Loading branch information
Radonirinaunimi committed Jul 17, 2024
2 parents 5aa3c9d + e3f7086 commit b4d7b13
Show file tree
Hide file tree
Showing 227 changed files with 29,135 additions and 638 deletions.
50 changes: 36 additions & 14 deletions n3fit/runcards/examples/nnpdf40-like.yml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Configuration file for n3fit
#
################################################################################
description: NNPDF4.0 NNLO baseline fit (nFONLL). Comparable to NNPDF40_nnlo_as_01180_qcd
description: NNLO baseline fit, NNPDF4.0 dataset

################################################################################
dataset_inputs:
Expand Down Expand Up @@ -91,7 +91,7 @@ dataset_inputs:

################################################################################
datacuts:
t0pdfset: NNPDF40_nnlo_as_01180
t0pdfset: 240701-02-rs-nnpdf40-baseline
q2min: 3.49
w2min: 12.5

Expand Down Expand Up @@ -136,34 +136,56 @@ fitting:
fitbasis: EVOL
savepseudodata: True
basis:
- {fl: sng, trainable: false, smallx: [1.091, 1.119], largex: [1.471, 3.021]}
- {fl: g, trainable: false, smallx: [0.7795, 1.095], largex: [2.742, 5.547]}
- {fl: v, trainable: false, smallx: [0.472, 0.7576], largex: [1.571, 3.559]}
- {fl: v3, trainable: false, smallx: [0.07483, 0.4501], largex: [1.714, 3.467]}
- {fl: v8, trainable: false, smallx: [0.5731, 0.779], largex: [1.555, 3.465]}
- {fl: t3, trainable: false, smallx: [-0.5498, 1.0], largex: [1.778, 3.5]}
- {fl: t8, trainable: false, smallx: [0.5469, 0.857], largex: [1.555, 3.391]}
- {fl: t15, trainable: false, smallx: [1.081, 1.142], largex: [1.491, 3.092]}
- {fl: sng, trainable: false, smallx: [1.089, 1.119], largex: [1.475, 3.119]}
- {fl: g, trainable: false, smallx: [0.7504, 1.098], largex: [2.814, 5.669]}
- {fl: v, trainable: false, smallx: [0.479, 0.7384], largex: [1.549, 3.532]}
- {fl: v3, trainable: false, smallx: [0.1073, 0.4397], largex: [1.733, 3.458]}
- {fl: v8, trainable: false, smallx: [0.5507, 0.7837], largex: [1.516, 3.356]}
- {fl: t3, trainable: false, smallx: [-0.4506, 0.9305], largex: [1.745, 3.424]}
- {fl: t8, trainable: false, smallx: [0.5877, 0.8687], largex: [1.522, 3.515]}
- {fl: t15, trainable: false, smallx: [1.089, 1.141], largex: [1.492, 3.222]}

################################################################################
positivity:
posdatasets:
- {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6} # Positivity Lagrange Multiplier
# Positivity Lagrange Multiplier
- {dataset: NNPDF_POS_2P24GEV_F2U, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_F2D, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_F2S, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_FLL-19PTS, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_FLL, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_DYU, maxlambda: 1e10}
- {dataset: NNPDF_POS_2P24GEV_DYD, maxlambda: 1e10}
- {dataset: NNPDF_POS_2P24GEV_DYS, maxlambda: 1e10}
- {dataset: NNPDF_POS_2P24GEV_F2C-17PTS, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6} # Positivity of MSbar PDFs
- {dataset: NNPDF_POS_2P24GEV_F2C, maxlambda: 1e6}
# Positivity of MSbar PDFs
- {dataset: NNPDF_POS_2P24GEV_XUQ, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XUB, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XDQ, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XDB, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XSQ, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XSB, maxlambda: 1e6}
- {dataset: NNPDF_POS_2P24GEV_XGL, maxlambda: 1e6}

added_filter_rules:
- dataset: NNPDF_POS_2P24GEV_FLL
rule: "x > 5.0e-7"
- dataset: NNPDF_POS_2P24GEV_F2C
rule: "x < 0.74"
- dataset: NNPDF_POS_2P24GEV_XGL
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XUQ
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XUB
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XDQ
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XDB
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XSQ
rule: "x > 0.1"
- dataset: NNPDF_POS_2P24GEV_XSB
rule: "x > 0.1"

integrability:
integdatasets:
- {dataset: NNPDF_INTEG_3GEV_XT8, maxlambda: 1e2}
Expand Down
90 changes: 90 additions & 0 deletions nnpdf_data/nnpdf_data/filter_utils/correlations.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
import numpy as np
from numpy.linalg import eig


def upper_triangular_to_symmetric(ut, dim):
"""Build a symmetric matrix from the upper diagonal"""
corr = np.zeros((dim, dim))
last = dim
first = 0
for i in range(dim):
corr[i, i:] = ut[first:last]
last += dim - i - 1
first += dim - i
return corr


def compute_covmat(corrmat: np.ndarray, unc: np.ndarray, ndata: int) -> list:
"""Compute the covariance matrix with the artificial stat uncertainties."""
# multiply by stat err
cov_mat = np.einsum("i,ij,j->ij", unc, corrmat, unc)
return covmat_to_artunc(ndata, cov_mat.flatten().tolist())


def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0):
r"""Convert the covariance matrix to a matrix of
artificial uncertainties.
NOTE: This function has been taken from validphys.newcommondata_utils.
If those utils get merged in the future, we can replace this.
Parameters
----------
ndata : integer
Number of data points
covmat_list : list
A one dimensional list which contains the elements of
the covariance matrix row by row. Since experimental
datasets provide these matrices in a list form, this
simplifies the implementation for the user.
no_of_norm_mat : int
Normalized covariance matrices may have an eigenvalue
of 0 due to the last data point not being linearly
independent. To allow for this, the user should input
the number of normalized matrices that are being treated
in an instance. For example, if a single covariance matrix
of a normalized distribution is being processed, the input
would be 1. If a covariance matrix contains pertains to
3 normalized datasets (i.e. cross covmat for 3
distributions), the input would be 3. The default value is
0 for when the covariance matrix pertains to an absolute
distribution.
Returns
-------
artunc : list
A two dimensional matrix (given as a list of lists)
which contains artificial uncertainties to be added
to the commondata. i^th row (or list) contains the
artificial uncertainties of the i^th data point.
"""
epsilon = -0.0000000001
neg_eval_count = 0
psd_check = True
covmat = np.zeros((ndata, ndata))
artunc = np.zeros((ndata, ndata))
for i in range(len(covmat_list)):
a = i // ndata
b = i % ndata
covmat[a][b] = covmat_list[i]
eigval, eigvec = eig(covmat)
for j in range(len(eigval)):
if eigval[j] < epsilon:
psd_check = False
elif eigval[j] > epsilon and eigval[j] <= 0:
neg_eval_count = neg_eval_count + 1
if neg_eval_count == (no_of_norm_mat + 1):
psd_check = False
elif eigval[j] > 0:
continue
if psd_check == False:
raise ValueError("The covariance matrix is not positive-semidefinite")
else:
for i in range(ndata):
for j in range(ndata):
if eigval[j] < 0:
continue
else:
artunc[i][j] = eigvec[i][j] * np.sqrt(eigval[j])
return artunc.tolist()
27 changes: 27 additions & 0 deletions nnpdf_data/nnpdf_data/filter_utils/uncertainties.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@

import numpy as np

def symmetrize_errors(delta_plus, delta_minus):
r"""Compute the symmetrized uncertainty and the shift in data point.
Parameters
----------
delta_plus : float
The top/plus uncertainty with sign
delta_minus : float
The bottom/minus uncertainty with sign
Returns
-------
se_delta : float
The value to be added to the data point
se_sigma : float
The symmetrized uncertainty to be used in commondata
"""
semi_diff = (delta_plus + delta_minus) / 2
average = (delta_plus - delta_minus) / 2
se_delta = semi_diff
se_sigma = np.sqrt(average * average + 2 * semi_diff * semi_diff)
return se_delta, se_sigma

Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,5 @@ implemented_observables:
theory:
FK_tables:
- - E155_NC_9GEV_EN_G1
operation: 'null'
- - E155_NC_9GEV_EN_F1
operation: 'ratio'
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,5 @@ implemented_observables:
theory:
FK_tables:
- - E155_NC_9GEV_EP_G1
operation: 'null'
- - E155_NC_9GEV_EP_F1
operation: 'ratio'
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,7 @@
HERE = pathlib.Path(__file__).parent
sys.path = [str(HERE.parent / "HERMES_NC_7GEV_EP")] + sys.path

from filter import compute_covmat

from nnpdf_data.filter_utils.correlations import compute_covmat

def read_data(fnames):
df = pd.DataFrame()
Expand Down Expand Up @@ -81,11 +80,9 @@ def write_data(df):
# Extract the correlation matrix and compute artificial systematics
ndata_points = len(data_central)
corrmatrix = read_corrmatrix(nb_datapoints=ndata_points)
# Compute the covariance matrix
compute_covmat(corrmatrix, df, ndata_points)

# Compute the covariance matrix
art_sys = compute_covmat(corrmatrix, df, ndata_points)
art_sys = compute_covmat(corrmatrix, df['stat'], ndata_points)

error = []
for i in range(ndata_points):
Expand Down
84 changes: 2 additions & 82 deletions nnpdf_data/nnpdf_data/new_commondata/HERMES_NC_7GEV_EP/filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@
import pathlib

import numpy as np
from numpy.linalg import eig
import pandas as pd
import yaml

from nnpdf_data.filter_utils.correlations import compute_covmat

def read_data(fnames):
df = pd.DataFrame()
Expand Down Expand Up @@ -49,84 +49,6 @@ def read_corrmatrix(nb_datapoints: int = 15) -> np.ndarray:

return df_corrs.value.values.reshape((nb_datapoints, nb_datapoints))


def covmat_to_artunc(ndata, covmat_list, no_of_norm_mat=0):
r"""Convert the covariance matrix to a matrix of
artificial uncertainties.
NOTE: This function has been taken from validphys.newcommondata_utils.
If those utils get merged in the future, we can replace this.
Parameters
----------
ndata : integer
Number of data points
covmat_list : list
A one dimensional list which contains the elements of
the covariance matrix row by row. Since experimental
datasets provide these matrices in a list form, this
simplifies the implementation for the user.
no_of_norm_mat : int
Normalized covariance matrices may have an eigenvalue
of 0 due to the last data point not being linearly
independent. To allow for this, the user should input
the number of normalized matrices that are being treated
in an instance. For example, if a single covariance matrix
of a normalized distribution is being processed, the input
would be 1. If a covariance matrix contains pertains to
3 normalized datasets (i.e. cross covmat for 3
distributions), the input would be 3. The default value is
0 for when the covariance matrix pertains to an absolute
distribution.
Returns
-------
artunc : list
A two dimensional matrix (given as a list of lists)
which contains artificial uncertainties to be added
to the commondata. i^th row (or list) contains the
artificial uncertainties of the i^th data point.
"""
epsilon = -0.0000000001
neg_eval_count = 0
psd_check = True
covmat = np.zeros((ndata, ndata))
artunc = np.zeros((ndata, ndata))
for i in range(len(covmat_list)):
a = i // ndata
b = i % ndata
covmat[a][b] = covmat_list[i]
eigval, eigvec = eig(covmat)
for j in range(len(eigval)):
if eigval[j] < epsilon:
psd_check = False
elif eigval[j] > epsilon and eigval[j] <= 0:
neg_eval_count = neg_eval_count + 1
if neg_eval_count == (no_of_norm_mat + 1):
psd_check = False
elif eigval[j] > 0:
continue
if psd_check == False:
raise ValueError('The covariance matrix is not positive-semidefinite')
else:
for i in range(ndata):
for j in range(ndata):
if eigval[j] < 0:
continue
else:
artunc[i][j] = eigvec[i][j] * np.sqrt(eigval[j])
return artunc.tolist()


def compute_covmat(corrmat: np.ndarray, df: pd.DataFrame, ndata: int) -> list:
"""Compute the covariance matrix with the artificial stat uncertanties."""
# multiply by stat err
stat = df["stat"]
cov_mat = np.einsum("i,ij,j->ij", stat, corrmat, stat)
return covmat_to_artunc(ndata, cov_mat.flatten().tolist())


def write_data(df):
data_central = []
for i in range(len(df["G"])):
Expand All @@ -153,11 +75,9 @@ def write_data(df):
# Extract the correlation matrix and compute artificial systematics
ndata_points = len(data_central)
corrmatrix = read_corrmatrix(nb_datapoints=ndata_points)
# Compute the covariance matrix
compute_covmat(corrmatrix, df, ndata_points)

# Compute the covariance matrix
art_sys = compute_covmat(corrmatrix, df, ndata_points)
art_sys = compute_covmat(corrmatrix, df['stat'], ndata_points)

error = []
for i in range(ndata_points):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,5 @@ implemented_observables:
theory:
FK_tables:
- - JLABE06_NC_3GEV_EN_G1
operation: "null"
- - JLABE06_NC_3GEV_EN_F1
operation: "ratio"
Original file line number Diff line number Diff line change
Expand Up @@ -49,4 +49,5 @@ implemented_observables:
theory:
FK_tables:
- - JLABE99_NC_3GEV_EN_G1
operation: "null"
- - JLABE99_NC_3GEV_EN_F1
operation: "ratio"
Original file line number Diff line number Diff line change
Expand Up @@ -528,4 +528,5 @@ implemented_observables:
theory:
FK_tables:
- - JLABEG1B_NC_NOTFIXED_ED_G1
operation: "null"
- - JLABEG1B_NC_NOTFIXED_ED_F1
operation: "ratio"
Original file line number Diff line number Diff line change
Expand Up @@ -188,4 +188,5 @@ implemented_observables:
theory:
FK_tables:
- - JLABEG1B_NC_NOTFIXED_EP_G1
operation: "null"
- - JLABEG1B_NC_NOTFIXED_EP_F1
operation: "ratio"
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,5 @@ implemented_observables:
theory:
FK_tables:
- - JLABEG1DVCS_NC_3GEV_EP_G1
operation: "null"
- - JLABEG1DVCS_NC_3GEV_EP_F1
operation: "ratio"
Original file line number Diff line number Diff line change
Expand Up @@ -52,4 +52,5 @@ implemented_observables:
theory:
FK_tables:
- - JLABEG1DVCS_NC_5GEV_ED_G1
operation: "null"
- - JLABEG1DVCS_NC_5GEV_ED_F1
operation: "ratio"
Loading

0 comments on commit b4d7b13

Please sign in to comment.