NNPDF · comane · Dec 23, 2022 · Feb 27, 2023 · Feb 27, 2023 · Feb 27, 2023
diff --git a/validphys2/src/validphys/closuretest/__init__.py b/validphys2/src/validphys/closuretest/__init__.py
@@ -11,3 +11,4 @@
 from validphys.closuretest.multiclosure_pdf_output import *
 from validphys.closuretest.multiclosure_preprocessing import *
 from validphys.closuretest.multiclosure_pseudodata import *
+from validphys.closuretest.inconsistent_plots import *
diff --git a/validphys2/src/validphys/closuretest/closure_results.py b/validphys2/src/validphys/closuretest/closure_results.py
@@ -10,6 +10,7 @@
 
 from reportengine import collect
 from reportengine.table import table
+from reportengine.figure import figure, figuregen
 
 from validphys.calcutils import calc_chi2, bootstrap_values
 from validphys.checks import check_pdf_is_montecarlo
@@ -20,7 +21,9 @@
     check_fits_same_filterseed,
     check_fits_underlying_law_match,
 )
-
+from validphys import plotutils
+from validphys.inconsistent_ct import InconsistentCommonData
+from validphys.covmats import dataset_inputs_covmat_from_systematics
 
 BiasData = namedtuple("BiasData", ("bias", "ndata"))
 
@@ -394,3 +397,64 @@ def fit_underlying_pdfs_summary(fit, fitunderlyinglaw):
 def summarise_closure_underlying_pdfs(fits_underlying_pdfs_summary):
     """Collects the underlying pdfs for all fits and concatenates them into a single table"""
     return pd.concat(fits_underlying_pdfs_summary, axis=1)
+
+
+@table
+def covmat_diffs(data, inconsistent_datasets, sys_rescaling_factor):
+    """Calculate trace difference between consistent and inconsistent covmat. Put results
+    in table labeling by the type of inconsistency modified and the dataset in which the inconsistency
+    was introduced
+
+    """
+
+    dataset_input_list = list(data.dsinputs)
+    commondata_wc = data.load_commondata_instance()
+    commondata_wc = [
+                    InconsistentCommonData(setname=cd.setname, ndata=cd.ndata, 
+                        commondataproc=cd.commondataproc, 
+                        nkin=cd.nkin, nsys=cd.nsys, 
+                        commondata_table = cd.commondata_table, 
+                        systype_table = cd.systype_table) 
+                    for cd in commondata_wc
+                    ]
+    consistent_covmat = dataset_inputs_covmat_from_systematics(
+        commondata_wc,
+        dataset_input_list,
+        use_weights_in_covmat=False,
+        norm_threshold=None,
+        _list_of_central_values=None,
+        _only_additive=False,
+    )
+
+    trace = np.trace(consistent_covmat)
+
+    # Study the impact on the trace of the covariance matrix if uncertainties 
+    # are rescaled by sys_rescaling_factor. Label by the type of error rescaled:
+    # ADD/CORR
+    # ADD/UNCORR
+    # MULT/CORR
+    # MULT/UNCORR
+    # use the following entries_dict as input for process_commondata
+    entries_dict = {"A/C":[True,False,True,False],"A/U":[True,False,False,True],
+                    "M/C":[False,True,True,False],"M/U":[False,True,False,True]}
+    impact_dict = {}
+    for inconsist_ds in inconsistent_datasets:
+        cov_dict = {}
+        for entry in entries_dict:
+
+            inp = entries_dict[entry]
+            commondata_wc_temp = [cd.process_commondata(inp[0],inp[1],inp[2],inp[3],
+                                                        inconsist_ds,sys_rescaling_factor)
+                                for cd in commondata_wc]
+            modified_covmat = dataset_inputs_covmat_from_systematics(
+                commondata_wc_temp,
+                dataset_input_list,
+                use_weights_in_covmat=False,
+                norm_threshold=None,
+                _list_of_central_values=None,
+                _only_additive=False,
+            )
+            cov_dict[entry] = (trace-np.trace(modified_covmat))/trace*100
+        impact_dict[inconsist_ds] = cov_dict
+        df = pd.DataFrame.from_records(impact_dict)
+    return df
diff --git a/validphys2/src/validphys/closuretest/inconsistent_plots.py b/validphys2/src/validphys/closuretest/inconsistent_plots.py
@@ -0,0 +1,107 @@
+"""
+closuretest/inconsistent_plots.py
+
+Useful plots for analysis of inconsistent closure tests
+"""
+from reportengine.figure import figure
+from reportengine.table import table
+from validphys import plotutils
+from validphys.inconsistent_ct import InconsistentCommonData
+from validphys.covmats import dataset_inputs_covmat_from_systematics
+import numpy as np
+import pandas as pd
+
+def covmat_trace(dataset_input_list,commondata_wc):
+    """Return trace of experimental matrix
+    """
+    normal_covmat = dataset_inputs_covmat_from_systematics(
+                commondata_wc,
+                dataset_input_list,
+                use_weights_in_covmat=False,
+                norm_threshold=None,
+                _list_of_central_values=None,
+                _only_additive=False,
+            )
+    return np.trace(normal_covmat)
+
+def mod_covmat_trace(dataset_input_list,commondata_wc, inconsistent_datasets, ADD, MULT, CORR, UNCORR, SPECIAL, sys_rescaling_factor):
+    """ Calculate trace of inconsistent covmat rescaled by
+    sys_rescaling_factor affecting ADD/MULT & CORR/UNCORR/SPECIAL.
+    """
+    commondata_wc_temp = [cd.process_commondata(ADD,MULT,CORR,UNCORR,SPECIAL,
+                                            inconsistent_datasets,sys_rescaling_factor)
+                            for cd in commondata_wc]
+    modified_covmat = dataset_inputs_covmat_from_systematics(
+            commondata_wc_temp,
+            dataset_input_list,
+            use_weights_in_covmat=False,
+            norm_threshold=None,
+            _list_of_central_values=None,
+            _only_additive=False,
+        )
+    #Calculate trace of modified trace (either for type 1 or 2)
+    modified_trace = np.trace(modified_covmat)
+    return modified_trace
+
+@figure
+def plot_trace_impact(data, inconsistent_datasets, ADD,MULT,CORR,UNCORR,SPECIAL):
+    """
+    Plot trace ratio for different sys_rescaling_factors. Specify what kind 
+    of error has been modified and for which datasets.
+    The marked points are the one for which the trace ratio corresponds between type1/type2 inconsistent fit.
+    """
+
+    # Load here all the data, does not make sense to load them each time the funciton is called
+    dataset_input_list = list(data.dsinputs)
+    commondata_wc = data.load_commondata_instance()
+    commondata_wc_ic = [
+                InconsistentCommonData(setname=cd.setname, ndata=cd.ndata, 
+                    commondataproc=cd.commondataproc, 
+                    nkin=cd.nkin, nsys=cd.nsys, 
+                    commondata_table = cd.commondata_table, 
+                    systype_table = cd.systype_table) 
+                for cd in commondata_wc
+                ]
+    normal_trace = covmat_trace(dataset_input_list,commondata_wc)
+    lam_factors = np.arange(0,3,0.02)
+    ratios = []
+    fig, ax = plotutils.subplots()
+    points = []
+    i = 0
+    for lam in lam_factors:
+        mod_trace = mod_covmat_trace(dataset_input_list,commondata_wc_ic, inconsistent_datasets, 
+                                     ADD,MULT,CORR,UNCORR,SPECIAL, 
+                                     lam)
+        if lam < 1: ratios.append(mod_trace/normal_trace*100)
+        if lam >= 1: ratios.append(normal_trace/mod_trace*100)
+        if i%10 == 0 and lam < 1: 
+            ax.plot(lam_factors[i],ratios[-1],marker = ".",
+                    markersize = 10, 
+                    label = "lambda type 2: " + str(round(lam_factors[i],3)) + "; ratio: " + str(round(ratios[-1],3)))
+            points.append(ratios[-1])
+        i += 1
+    for point in points:
+        a,b = find_intersections(np.asarray(lam_factors), np.asarray(ratios), point)
+        ax.plot(a,b,marker = ".",markersize = 10,label = "lambda type1: " + str(round(a[0],3))+"; ratio: " + str(round(b[0],3)))
+    type_a_m = ""
+    type_c_u_s = ""
+    if ADD: type_a_m = " ADD "
+    if MULT: type_a_m = type_a_m + " MULT "
+    if CORR: type_c_u_s = " CORR "
+    if UNCORR: type_c_u_s = type_c_u_s + " UNCORR "
+    if SPECIAL: type_c_u_s = type_c_u_s  + " SPECIAL"
+    ax.plot(lam_factors, ratios, label = "percentage ratios")
+    title = "Impact of inconsistency of type " + str(type_a_m)  + " and " + str(type_c_u_s) + " in \n" + str(inconsistent_datasets) + " wrt all ds. \n"
+    ax.legend()
+    ax.set_title(title)
+    ax.set_xlabel("rescaling factor")
+    ax.set_ylabel("percentage ratio")
+    return fig
+
+def find_intersections(x, y, C):
+    # Contains numpy indexing tricks that can be hard to reproduce
+    # in the case where both functions are non-constants
+    ii, = np.nonzero((y[1:]-C)*(y[:-1]-C) < 0.)  # intersection indices
+    x_intersections = x[ii] + (C - y[ii])/(y[1+ii] - y[ii])*(x[1+ii] - x[ii])
+    y_intersections = C * np.ones(len(ii))
+    return x_intersections, y_intersections