Skip to content

Commit

Permalink
Piazza pulita 2: removed principal_components_variance_distribution_d…
Browse files Browse the repository at this point in the history
…ataset as we are not using it
  • Loading branch information
comane committed Mar 17, 2024
1 parent 6ba487c commit bf843e1
Show file tree
Hide file tree
Showing 2 changed files with 0 additions and 100 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -126,35 +126,3 @@ def principal_components_bias_variance_dataset(
principal_components_bias_variance_datasets = collect(
"principal_components_bias_variance_dataset", ("data",)
)


def principal_components_variance_distribution_dataset(
internal_multiclosure_dataset_loader, principal_components_dataset
):
"""
TODO
"""

closures_th, _, _, _ = internal_multiclosure_dataset_loader

reps = np.asarray([th.error_members for th in closures_th])

pc_basis, pc_reps, n_comp = principal_components_dataset

if n_comp <= 1:
return None, n_comp
# estimate (PC) pdf covariance matrix (from replicas), shape is (Npc, Npc)
covmat_pdf = np.cov(pc_reps)
sqrt_covmat_pdf = covmats.sqrt_covmat(covmat_pdf)

variances = []
for i in range(reps.shape[0]):
diffs = pc_basis @ (reps[i, :, :] - reps[i, :, :].mean(axis=1, keepdims=True))
variances.append([calc_chi2(sqrt_covmat_pdf, diffs)])

return np.asarray(variances), n_comp


principal_components_variance_distribution_datasets = collect(
"principal_components_variance_distribution_dataset", ("data",)
)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@

import pandas as pd
import numpy as np
from scipy import stats

from reportengine.figure import figuregen
from reportengine.table import table
Expand All @@ -21,73 +20,6 @@
)


@figuregen
def plot_bias_distribution_datasets(principal_components_bias_variance_datasets, each_dataset):
"""
TODO
"""
for pc_bias_var_dataset, ds in zip(principal_components_bias_variance_datasets, each_dataset):
biases, variances, n_comp = pc_bias_var_dataset

try:
sqrt_rbv = np.sqrt(np.mean(biases) / np.mean(variances))
vals = np.linspace(0, 3 * n_comp, 100)
chi2_pdf = stats.chi2.pdf(vals, df=n_comp)
chi2_cdf = lambda x: stats.chi2.cdf(x, df=n_comp)
pvalue_ks = stats.kstest(biases, chi2_cdf).pvalue
fig, ax = plotutils.subplots()
ax.hist(biases, density=True, bins='auto', label=f"Bias {str(ds)}, p={pvalue_ks:.3f}")
ax.plot(vals, chi2_pdf, label=f"chi2, dof={n_comp}")
ax.plot([], [], 'ro', label=f"sqrt(Rbv) = {sqrt_rbv:.2f}")

ax.legend()

yield fig

except:
fig, ax = plotutils.subplots()
ax.plot([], [], label=f"Dataset: {str(ds)}")
ax.legend()
yield fig


@figuregen
def plot_variance_distribution_datasets(
principal_components_variance_distribution_datasets, each_dataset
):
"""
TODO
"""

for pc_var_dataset, ds in zip(
principal_components_variance_distribution_datasets, each_dataset
):
variances, n_comp = pc_var_dataset
try:
vals = np.linspace(0, 3 * n_comp, 100)
chi2_pdf = stats.chi2.pdf(vals, df=n_comp)
chi2_cdf = lambda x: stats.chi2.cdf(x, df=n_comp)

for i, var_fit in enumerate(variances):
pvalue_ks = stats.kstest(var_fit[0], chi2_cdf).pvalue

fig, ax = plotutils.subplots()
ax.hist(
var_fit[0],
density=True,
bins='auto',
label=f"Variance {str(ds)}, p={pvalue_ks:.3f}",
)
ax.plot(vals, chi2_pdf, label=f"chi2, dof={n_comp}")
ax.set_title(f"Fit {i}")
ax.legend()

yield fig
except:
fig, ax = plotutils.subplots()
yield fig


@table
def table_bias_variance_datasets(principal_components_bias_variance_datasets, each_dataset):
"""
Expand Down

0 comments on commit bf843e1

Please sign in to comment.