pca_tables

NNPDF · Jun 21, 2023 · 845eccb · 845eccb
1 parent f2fd1b7
commit 845eccb
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 0 deletions.
diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py
@@ -927,6 +927,23 @@ def dataset_fits_bias_replicas_variance_samples_pdf_covmat(
     return biases, np.concatenate(variances), N_eig
 
 
+@check_multifit_replicas
+def fits_dataset_bias_variance_pdf_covmat_expected(
+    internal_multiclosure_dataset_loader,
+    _internal_max_reps=None,
+    _internal_min_reps=20,
+):
+    biases, variances, N_eig = dataset_fits_bias_replicas_variance_samples_pdf_covmat(
+    internal_multiclosure_dataset_loader,
+    _internal_max_reps=None,
+    _internal_min_reps=20,
+)
+
+    return np.mean(biases), np.mean(variances), N_eig
+
+
+datasets_expected_bias_variance_pdf_covmat = collect("fits_dataset_bias_variance_pdf_covmat_expected", ("data",))
+
 def dataset_inputs_fits_bias_replicas_variance_samples(
     internal_multiclosure_data_loader,
     _internal_max_reps=None,

diff --git a/validphys2/src/validphys/closuretest/multiclosure_output.py b/validphys2/src/validphys/closuretest/multiclosure_output.py
@@ -215,6 +215,30 @@ def total_bias_variance_ratio(
     return res
 
 
+@table
+def datasets_bias_variance_pdf_covmat(datasets_expected_bias_variance_pdf_covmat, each_dataset):
+    """For each dataset calculate the expected bias and expected variance
+    across fitsband tabulate the results. Bias and Variance are normalized by number of data points
+
+    Notes
+    -----
+
+    This is to check the weight each dataset/process has in the calculation of the complete R_bv ratio.
+    This is because one dataset alone could have a correct B/V=1 but if Bias and Variance are both centered
+    around a number >> 1 this means that in the calculation of B/V total ratio the specific dataset/
+    process is going to have much more weight than the rest
+
+    """
+    records = []
+    for ds, (bias, var, ndata) in zip(each_dataset, datasets_expected_bias_variance_pdf_covmat):
+        records.append(dict(dataset=str(ds), ndata=ndata, bias=bias, variance=var))
+    df = pd.DataFrame.from_records(
+        records, index="dataset", columns=("dataset", "ndata", "bias", "variance")
+    )
+    df.columns = ["ndata", "bias", "variance"]
+    return df
+
+
 @table
 def expected_xi_from_bias_variance(sqrt_experiments_bias_variance_ratio):
     """Given the ``sqrt_experiments_bias_variance_ratio`` calculate a predicted