diff --git a/examples/it/plot_entropies.py b/examples/it/plot_entropies.py index cb6ba476..52040372 100644 --- a/examples/it/plot_entropies.py +++ b/examples/it/plot_entropies.py @@ -29,13 +29,14 @@ # ----------------------------------- # # Let us define several estimators of entropy. We are going to use the GC -# (Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based -# estimator. +# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based +# estimator and the histogram estimator. # list of estimators to compare metrics = { "GC": get_entropy("gc"), "Gaussian": get_entropy(method="gauss"), + "Histogram": get_entropy(method="histogram"), "KNN-3": get_entropy("knn", k=3), "Kernel": get_entropy("kernel"), } diff --git a/examples/it/plot_entropies_mvar.py b/examples/it/plot_entropies_mvar.py index 4dc3dfd7..6dfdceba 100644 --- a/examples/it/plot_entropies_mvar.py +++ b/examples/it/plot_entropies_mvar.py @@ -24,13 +24,14 @@ # ----------------------------------- # # Let us define several estimators of entropy. We are going to use the GC -# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based -# and the Gaussian estimators. +# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the +# Gaussian estimators and the histogram estimator. # list of estimators to compare metrics = { "GC": get_entropy("gc"), "Gaussian": get_entropy("gauss"), + "Histogram": get_entropy("histogram"), "KNN-3": get_entropy("knn", k=3), "Kernel": get_entropy("kernel"), } diff --git a/examples/it/plot_entropy_high_dimensional.py b/examples/it/plot_entropy_high_dimensional.py index 19a8e5c8..b2ed7138 100644 --- a/examples/it/plot_entropy_high_dimensional.py +++ b/examples/it/plot_entropy_high_dimensional.py @@ -12,6 +12,7 @@ """ +# %% import numpy as np from hoi.core import get_entropy @@ -25,7 +26,8 @@ # --------------------------- # # We are going to use the GCMI (Gaussian Copula Mutual Information), KNN -# (k Nearest Neighbor) and a Gaussian kernel-based estimator. +# (k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram +# estimator. # list of estimators to compare metrics = { @@ -33,10 +35,11 @@ "KNN-3": get_entropy("knn", k=3), "KNN-10": get_entropy("knn", k=10), "Kernel": get_entropy("kernel"), + "Histogram": get_entropy("histogram"), } # number of samples to simulate data -n_samples = np.geomspace(100, 10000, 10).astype(int) +n_samples = np.geomspace(20, 1000, 15).astype(int) # number of repetitions to estimate the percentile interval n_repeat = 10 @@ -68,6 +71,7 @@ def plot(ent, ent_theoric, ax): ax.set_ylabel("Entropy [bits]") +# %% ############################################################################### # Entropy of data sampled from multinormal distribution # ------------------------------------------- diff --git a/examples/it/plot_mi.py b/examples/it/plot_mi.py index 454dc6c5..08cd5d00 100644 --- a/examples/it/plot_mi.py +++ b/examples/it/plot_mi.py @@ -31,12 +31,19 @@ # # Let us define several estimators of MI. We are going to use the GC MI # (Gaussian Copula Mutual Information), the KNN (k Nearest Neighbor) and the -# kernel-based estimator and using the a binning approach. +# kernel-based estimator, using the a binning approach and the histogram +# estimator. Please note that the histogram estimator is equivalent to the +# binning, with a correction that relate to the difference between the Shannon +# entropy of discrete variables and the differential entropy of continuous +# variables. This correction in the case of mutual information (MI) is not +# needed, because in the operation to compute the MI, the difference between +# discrete and differential entropy cancel out. # create a special function for the binning approach as it requires binary data mi_binning_fcn = get_mi("binning", base=2) + def mi_binning(x, y, **kwargs): x = digitize(x.T, **kwargs).T y = digitize(y.T, **kwargs).T @@ -51,6 +58,7 @@ def mi_binning(x, y, **kwargs): "KNN-10": get_mi("knn", k=10), "Kernel": get_mi("kernel"), "Binning": partial(mi_binning, n_bins=4), + "Histogram": get_mi("histogram", n_bins=4), } # number of samples to simulate data diff --git a/examples/it/plot_mi_high_dimensional.py b/examples/it/plot_mi_high_dimensional.py index 4c409a4b..9325063d 100644 --- a/examples/it/plot_mi_high_dimensional.py +++ b/examples/it/plot_mi_high_dimensional.py @@ -16,6 +16,8 @@ Czyz et al., NeurIPS 2023 :cite:`czyz2024beyond`. """ +# %% + import numpy as np from hoi.core import get_mi @@ -28,18 +30,19 @@ # Definition of MI estimators # --------------------------- # -# We are going to use the GCMI (Gaussian Copula Mutual Information) and KNN -# (k Nearest Neighbor) +# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN +# (k Nearest Neighbor) and histogram estimator. # list of estimators to compare metrics = { "GCMI": get_mi("gc", biascorrect=False), "KNN-3": get_mi("knn", k=3), "KNN-10": get_mi("knn", k=10), + "Histogram": get_mi("histogram", n_bins=3), } # number of samples to simulate data -n_samples = np.geomspace(1000, 10000, 10).astype(int) +n_samples = np.geomspace(20, 1000, 15).astype(int) # number of repetitions to estimate the percentile interval n_repeat = 10 @@ -69,6 +72,8 @@ def plot(mi, mi_theoric, ax): ax.set_ylabel("Mutual-information [bits]") +# %% + ############################################################################### # MI of data sampled from splitted multinormal distribution # -------------------------------------------