brainets · EtienneCmb · Sep 13, 2024 · Sep 3, 2024 · Sep 3, 2024
diff --git a/examples/it/plot_entropies.py b/examples/it/plot_entropies.py
@@ -29,13 +29,14 @@
 # -----------------------------------
 #
 # Let us define several estimators of entropy. We are going to use the GC
-# (Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based
-# estimator.
+# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
+# estimator and the histogram estimator.
 
 # list of estimators to compare
 metrics = {
     "GC": get_entropy("gc"),
     "Gaussian": get_entropy(method="gauss"),
+    "Histogram": get_entropy(method="histogram"),
     "KNN-3": get_entropy("knn", k=3),
     "Kernel": get_entropy("kernel"),
 }

diff --git a/examples/it/plot_entropies_mvar.py b/examples/it/plot_entropies_mvar.py
@@ -24,13 +24,14 @@
 # -----------------------------------
 #
 # Let us define several estimators of entropy. We are going to use the GC
-# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
-# and the Gaussian estimators.
+# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the
+# Gaussian estimators and the histogram estimator.
 
 # list of estimators to compare
 metrics = {
     "GC": get_entropy("gc"),
     "Gaussian": get_entropy("gauss"),
+    "Histogram": get_entropy("histogram"),
     "KNN-3": get_entropy("knn", k=3),
     "Kernel": get_entropy("kernel"),
 }

diff --git a/examples/it/plot_entropy_high_dimensional.py b/examples/it/plot_entropy_high_dimensional.py
@@ -12,6 +12,7 @@
 
 """
 
+# %%
 import numpy as np
 
 from hoi.core import get_entropy
@@ -25,18 +26,20 @@
 # ---------------------------
 #
 # We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
-# (k Nearest Neighbor) and a Gaussian kernel-based estimator.
+# (k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram
+# estimator.
 
 # list of estimators to compare
 metrics = {
     "GCMI": get_entropy("gc", biascorrect=False),
     "KNN-3": get_entropy("knn", k=3),
     "KNN-10": get_entropy("knn", k=10),
     "Kernel": get_entropy("kernel"),
+    "Histogram": get_entropy("histogram"),
 }
 
 # number of samples to simulate data
-n_samples = np.geomspace(100, 10000, 10).astype(int)
+n_samples = np.geomspace(20, 1000, 15).astype(int)
 
 # number of repetitions to estimate the percentile interval
 n_repeat = 10
@@ -68,6 +71,7 @@ def plot(ent, ent_theoric, ax):
     ax.set_ylabel("Entropy [bits]")
 
 
+# %%
 ###############################################################################
 # Entropy of data sampled from multinormal distribution
 # -------------------------------------------

diff --git a/examples/it/plot_mi.py b/examples/it/plot_mi.py
@@ -31,12 +31,19 @@
 #
 # Let us define several estimators of MI. We are going to use the GC MI
 # (Gaussian Copula Mutual Information), the KNN (k Nearest Neighbor) and the
-# kernel-based estimator and using the a binning approach.
+# kernel-based estimator, using the a binning approach and the histogram
+# estimator. Please note that the histogram estimator is equivalent to the
+# binning, with a correction that relate to the difference between the Shannon
+# entropy of discrete variables and the differential entropy of continuous
+# variables. This correction in the case of mutual information (MI) is not
+# needed, because in the operation to compute the MI, the difference between
+# discrete and differential entropy cancel out.
 
 
 # create a special function for the binning approach as it requires binary data
 mi_binning_fcn = get_mi("binning", base=2)
 
+
 def mi_binning(x, y, **kwargs):
     x = digitize(x.T, **kwargs).T
     y = digitize(y.T, **kwargs).T
@@ -51,6 +58,7 @@ def mi_binning(x, y, **kwargs):
     "KNN-10": get_mi("knn", k=10),
     "Kernel": get_mi("kernel"),
     "Binning": partial(mi_binning, n_bins=4),
+    "Histogram": get_mi("histogram", n_bins=4),
 }
 
 # number of samples to simulate data

diff --git a/examples/it/plot_mi_high_dimensional.py b/examples/it/plot_mi_high_dimensional.py
@@ -16,6 +16,8 @@
 Czyz et al., NeurIPS 2023 :cite:`czyz2024beyond`.
 """
 
+# %%
+
 import numpy as np
 
 from hoi.core import get_mi
@@ -28,18 +30,19 @@
 # Definition of MI estimators
 # ---------------------------
 #
-# We are going to use the GCMI (Gaussian Copula Mutual Information) and KNN
-# (k Nearest Neighbor)
+# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
+# (k Nearest Neighbor) and histogram estimator.
 
 # list of estimators to compare
 metrics = {
     "GCMI": get_mi("gc", biascorrect=False),
     "KNN-3": get_mi("knn", k=3),
     "KNN-10": get_mi("knn", k=10),
+    "Histogram": get_mi("histogram", n_bins=3),
 }
 
 # number of samples to simulate data
-n_samples = np.geomspace(1000, 10000, 10).astype(int)
+n_samples = np.geomspace(20, 1000, 15).astype(int)
 
 # number of repetitions to estimate the percentile interval
 n_repeat = 10
@@ -69,6 +72,8 @@ def plot(mi, mi_theoric, ax):
     ax.set_ylabel("Mutual-information [bits]")
 
 
+# %%
+
 ###############################################################################
 # MI of data sampled from splitted multinormal distribution
 # -------------------------------------------