Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
comane committed Jun 20, 2023
1 parent 571682c commit f2fd1b7
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,24 @@
"dataset_fits_bias_replicas_variance_samples", ("dataspecs",)
)

multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat = collect(
"dataset_fits_bias_replicas_variance_samples_pdf_covmat", ("dataspecs",)
)

multi_fits_bootstrap_dataset_bias_variance = collect(
"fits_bootstrap_dataset_bias_variance", ("dataspecs",)
)

multi_bias_variance_resampling_dataset = collect("bias_variance_resampling_dataset", ("dataspecs",))



## PDF SPACE
multi_fits_pdf_total_bias_variance = collect(
"fits_pdf_total_bias_variance", ("dataspecs", )
)


def dataset_replica_minus_central(internal_multiclosure_dataset_loader):
"""For a given dataset calculate the difference between theory prediction
of each replica and central replica.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
estimators in the space of data
"""
import numpy as np

from reportengine.figure import figure
from validphys import plotutils
Expand Down Expand Up @@ -39,7 +40,7 @@ def plot_replica_central_diff_multi(dataset_replica_minus_central_multi, dataspe


@figure
def plot_variance_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples, dataspecs):
def plot_variance_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs):
"""
histogram of the distribution of the variances (k) defined as the
distance between central replica and replica (k) in units of the
Expand All @@ -49,18 +50,18 @@ def plot_variance_distribution_multi(multi_dataset_fits_bias_replicas_variance_s
"""
fig, ax = plotutils.subplots()
for (_, variance_dist, _), spec in zip(
multi_dataset_fits_bias_replicas_variance_samples, dataspecs
multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs
):
label = spec['speclabel']

ax.hist(variance_dist, bins='auto', density=True, alpha=0.5, label=label)
ax.hist(variance_dist, bins='auto', density=True, alpha=0.5, label=label+f"variance={np.mean(variance_dist)}")

ax.legend()
return fig


@figure
def plot_bias_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples, dataspecs):
def plot_bias_distribution_multi(multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs):
"""
histogram of the distribution of the biases (l) defined as the
distance between central replica and underlying law in units of the
Expand All @@ -70,15 +71,50 @@ def plot_bias_distribution_multi(multi_dataset_fits_bias_replicas_variance_sampl
"""
fig, ax = plotutils.subplots()
for (bias_dist, _, _), spec in zip(
multi_dataset_fits_bias_replicas_variance_samples, dataspecs
multi_dataset_fits_bias_replicas_variance_samples_pdf_covmat, dataspecs
):
label = spec['speclabel']

ax.hist(bias_dist, bins='auto', density=True, alpha=0.5, label=label)
ax.hist(bias_dist, bins='auto', density=True, alpha=0.5, label=label+f"bias = {np.mean(bias_dist)}")

ax.legend()
return fig


@figure
def plot_pdf_space_variance_distribution_multi(multi_fits_pdf_total_bias_variance, dataspecs):
"""
"""
fig, ax = plotutils.subplots()
for (_, variance_dist), spec in zip(
multi_fits_pdf_total_bias_variance, dataspecs
):
label = spec['speclabel']

variance_dist = np.concatenate(variance_dist, axis=0)
ax.hist(variance_dist, bins='auto', density=True, alpha=0.5, label = label + f" Variance Distribution, mean = {np.mean(variance_dist):.3f}")

ax.legend()
return fig

@figure
def plot_pdf_space_bias_distribution_multi(multi_fits_pdf_total_bias_variance, dataspecs):
"""
"""
fig, ax = plotutils.subplots()
for (bias_dist, _), spec in zip(
multi_fits_pdf_total_bias_variance, dataspecs
):
label = spec['speclabel']

ax.hist(bias_dist, bins='auto', density=True, alpha=0.5, label = label + f" Bias Distribution, mean = {np.mean(bias_dist):.3f}")

ax.legend()
return fig



@figure
def plot_multi_bias_distribution_bootstrap(multi_fits_bootstrap_dataset_bias_variance, dataspecs):
Expand Down
71 changes: 71 additions & 0 deletions validphys2/src/validphys/closuretest/multiclosure.py
Original file line number Diff line number Diff line change
Expand Up @@ -856,6 +856,77 @@ def dataset_fits_bias_replicas_variance_samples(
return biases, np.concatenate(variances), len(law_th)


def dataset_fits_bias_replicas_variance_samples_pdf_covmat(
internal_multiclosure_dataset_loader,
_internal_max_reps=None,
_internal_min_reps=20,
):
"""
like `dataset_fits_bias_replicas_variance_samples` but using a regularized
PDF covmat for computation of variance and bias
"""
from validphys import covmats
closures_th, law_th, _, sqrtcov = internal_multiclosure_dataset_loader
# The dimentions here are (fit, data point, replica)
reps = np.asarray([th.error_members[:, :_internal_max_reps] for th in closures_th])
# take mean across replicas - since we might have changed no. of reps
centrals = reps.mean(axis=2)

# loop over fits to determine the number of eigenvalues of the covariance matrix to keep
N_eigenvalues = []
for i in range(reps.shape[0]):
pdf_covmat = np.cov(reps[i,:,:])

# Diagonalize the matrix and sort the eigenvalues and eigenvectors from largest to smallest
L, W = np.linalg.eig(pdf_covmat)
idx = L.argsort()[::-1]
L = L[idx]
W = W[:,idx]

# select nr of eigenvalues based on explained variance
perc_var = 0
N_eig = 0

for eig in L:
N_eig+=1
perc_var += np.real(eig) / np.real(np.sum(L))

if perc_var > 0.99:
N_eigenvalues.append(N_eig)
break

N_eig = np.min(N_eigenvalues)

biases = []
variances = []

# loop over fits to compute variances and biases
for i in range(reps.shape[0]):
pdf_covmat = np.cov(reps[i,:,:])
L, W = np.linalg.eig(pdf_covmat)
idx = L.argsort()[::-1]
L = L[idx]
W = W[:,idx]

# Keep only the N_eig largest eigenvectors
Wtilde = W[:, :N_eig]

# Transform initial covariance matrix
pdf_covmat_pca = np.einsum("ij,jk->ik", np.einsum("ij,ik->jk",Wtilde, pdf_covmat), Wtilde).real
sqrt_pdf_covmat_pca = covmats.sqrt_covmat(pdf_covmat_pca)

# transform data
diffs_variance = reps[i, :, :] - reps[i, :, :].mean(axis=1, keepdims=True)
diffs_variance_pca = diffs_variance.T @ Wtilde
diff_bias = law_th.central_value - centrals[i].T
diff_bias_pca = diff_bias.T @ Wtilde

variances.append(np.real(calc_chi2(sqrt_pdf_covmat_pca, diffs_variance_pca.T)))
biases.append(np.real(calc_chi2(sqrt_pdf_covmat_pca, diff_bias_pca)))

return biases, np.concatenate(variances), N_eig


def dataset_inputs_fits_bias_replicas_variance_samples(
internal_multiclosure_data_loader,
_internal_max_reps=None,
Expand Down

0 comments on commit f2fd1b7

Please sign in to comment.