diff --git a/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent.py b/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent.py index 9221ddd321..0a5864236b 100644 --- a/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent.py +++ b/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent.py @@ -20,6 +20,10 @@ "dataset_fits_bias_replicas_variance_samples", ("dataspecs",) ) +multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat = collect( + "dataset_fits_bias_replicas_variance_samples_pdf_covmat", ("dataspecs",) +) + multi_fits_bootstrap_dataset_bias_variance = collect( "fits_bootstrap_dataset_bias_variance", ("dataspecs",) ) @@ -27,6 +31,13 @@ multi_bias_variance_resampling_dataset = collect("bias_variance_resampling_dataset", ("dataspecs",)) + +## PDF SPACE +multi_fits_pdf_total_bias_variance = collect( + "fits_pdf_total_bias_variance", ("dataspecs", ) +) + + def dataset_replica_minus_central(internal_multiclosure_dataset_loader): """For a given dataset calculate the difference between theory prediction of each replica and central replica. diff --git a/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent_output.py b/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent_output.py index 67b974c3c2..c5d2c10ada 100644 --- a/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent_output.py +++ b/validphys2/src/validphys/closuretest/inconsistent_closuretest/multiclosure_inconsistent_output.py @@ -6,6 +6,7 @@ estimators in the space of data """ +import numpy as np from reportengine.figure import figure from validphys import plotutils @@ -39,7 +40,7 @@ def plot_replica_central_diff_multi(dataset_replica_minus_central_multi, dataspe @figure -def plot_variance_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples, dataspecs): +def plot_variance_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs): """ histogram of the distribution of the variances (k) defined as the distance between central replica and replica (k) in units of the @@ -49,18 +50,18 @@ def plot_variance_distribution_multi(multi_dataset_fits_bias_replicas_variance_s """ fig, ax = plotutils.subplots() for (_, variance_dist, _), spec in zip( - multi_dataset_fits_bias_replicas_variance_samples, dataspecs + multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs ): label = spec['speclabel'] - - ax.hist(variance_dist, bins='auto', density=True, alpha=0.5, label=label) + + ax.hist(variance_dist, bins='auto', density=True, alpha=0.5, label=label+f"variance={np.mean(variance_dist)}") ax.legend() return fig @figure -def plot_bias_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples, dataspecs): +def plot_bias_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs): """ histogram of the distribution of the biases (l) defined as the distance between central replica and underlying law in units of the @@ -70,15 +71,50 @@ def plot_bias_distribution_multi(multi_dataset_fits_bias_replicas_variance_sampl """ fig, ax = plotutils.subplots() for (bias_dist, _, _), spec in zip( - multi_dataset_fits_bias_replicas_variance_samples, dataspecs + multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs ): label = spec['speclabel'] - ax.hist(bias_dist, bins='auto', density=True, alpha=0.5, label=label) + ax.hist(bias_dist, bins='auto', density=True, alpha=0.5, label=label+f"bias = {np.mean(bias_dist)}") + + ax.legend() + return fig + + +@figure +def plot_pdf_space_variance_distribution_multi(multi_fits_pdf_total_bias_variance, dataspecs): + """ + + """ + fig, ax = plotutils.subplots() + for (_, variance_dist), spec in zip( + multi_fits_pdf_total_bias_variance, dataspecs + ): + label = spec['speclabel'] + variance_dist = np.concatenate(variance_dist, axis=0) + ax.hist(variance_dist, bins='auto', density=True, alpha=0.5, label = label + f" Variance Distribution, mean = {np.mean(variance_dist):.3f}") + ax.legend() return fig +@figure +def plot_pdf_space_bias_distribution_multi(multi_fits_pdf_total_bias_variance, dataspecs): + """ + + """ + fig, ax = plotutils.subplots() + for (bias_dist, _), spec in zip( + multi_fits_pdf_total_bias_variance, dataspecs + ): + label = spec['speclabel'] + + ax.hist(bias_dist, bins='auto', density=True, alpha=0.5, label = label + f" Bias Distribution, mean = {np.mean(bias_dist):.3f}") + + ax.legend() + return fig + + @figure def plot_multi_bias_distribution_bootstrap(multi_fits_bootstrap_dataset_bias_variance, dataspecs): diff --git a/validphys2/src/validphys/closuretest/multiclosure.py b/validphys2/src/validphys/closuretest/multiclosure.py index 492a418e6e..b0b9b02341 100644 --- a/validphys2/src/validphys/closuretest/multiclosure.py +++ b/validphys2/src/validphys/closuretest/multiclosure.py @@ -856,6 +856,77 @@ def dataset_fits_bias_replicas_variance_samples( return biases, np.concatenate(variances), len(law_th) +def dataset_fits_bias_replicas_variance_samples_pdf_covmat( + internal_multiclosure_dataset_loader, + _internal_max_reps=None, + _internal_min_reps=20, +): + """ + like `dataset_fits_bias_replicas_variance_samples` but using a regularized + PDF covmat for computation of variance and bias + """ + from validphys import covmats + closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader + # The dimentions here are (fit, data point, replica) + reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th]) + # take mean across replicas - since we might have changed no. of reps + centrals = reps.mean(axis=2) + + # loop over fits to determine the number of eigenvalues of the covariance matrix to keep + N_eigenvalues = [] + for i in range(reps.shape[0]): + pdf_covmat = np.cov(reps[i,:,:]) + + # Diagonalize the matrix and sort the eigenvalues and eigenvectors from largest to smallest + L, W = np.linalg.eig(pdf_covmat) + idx = L.argsort()[::-1] + L = L[idx] + W = W[:,idx] + + # select nr of eigenvalues based on explained variance + perc_var = 0 + N_eig = 0 + + for eig in L: + N_eig+=1 + perc_var += np.real(eig) / np.real(np.sum(L)) + + if perc_var > 0.99: + N_eigenvalues.append(N_eig) + break + + N_eig = np.min(N_eigenvalues) + + biases = [] + variances = [] + + # loop over fits to compute variances and biases + for i in range(reps.shape[0]): + pdf_covmat = np.cov(reps[i,:,:]) + L, W = np.linalg.eig(pdf_covmat) + idx = L.argsort()[::-1] + L = L[idx] + W = W[:,idx] + + # Keep only the N_eig largest eigenvectors + Wtilde = W[:, :N_eig] + + # Transform initial covariance matrix + pdf_covmat_pca = np.einsum("ij,jk->ik", np.einsum("ij,ik->jk",Wtilde, pdf_covmat), Wtilde).real + sqrt_pdf_covmat_pca = covmats.sqrt_covmat(pdf_covmat_pca) + + # transform data + diffs_variance = reps[i, :, :] - reps[i, :, :].mean(axis=1, keepdims=True) + diffs_variance_pca = diffs_variance.T @ Wtilde + diff_bias = law_th.central_value - centrals[i].T + diff_bias_pca = diff_bias.T @ Wtilde + + variances.append(np.real(calc_chi2(sqrt_pdf_covmat_pca, diffs_variance_pca.T))) + biases.append(np.real(calc_chi2(sqrt_pdf_covmat_pca, diff_bias_pca))) + + return biases, np.concatenate(variances), N_eig + + def dataset_inputs_fits_bias_replicas_variance_samples( internal_multiclosure_data_loader, _internal_max_reps=None,