Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding the histogram estimator to the examples. #55

Merged
merged 2 commits into from
Sep 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions examples/it/plot_entropies.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@
# -----------------------------------
#
# Let us define several estimators of entropy. We are going to use the GC
# (Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based
# estimator.
# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
# estimator and the histogram estimator.

# list of estimators to compare
metrics = {
"GC": get_entropy("gc"),
"Gaussian": get_entropy(method="gauss"),
"Histogram": get_entropy(method="histogram"),
"KNN-3": get_entropy("knn", k=3),
"Kernel": get_entropy("kernel"),
}
Expand Down
5 changes: 3 additions & 2 deletions examples/it/plot_entropies_mvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@
# -----------------------------------
#
# Let us define several estimators of entropy. We are going to use the GC
# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
# and the Gaussian estimators.
# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the
# Gaussian estimators and the histogram estimator.

# list of estimators to compare
metrics = {
"GC": get_entropy("gc"),
"Gaussian": get_entropy("gauss"),
"Histogram": get_entropy("histogram"),
"KNN-3": get_entropy("knn", k=3),
"Kernel": get_entropy("kernel"),
}
Expand Down
8 changes: 6 additions & 2 deletions examples/it/plot_entropy_high_dimensional.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

"""

# %%
import numpy as np

from hoi.core import get_entropy
Expand All @@ -25,18 +26,20 @@
# ---------------------------
#
# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
# (k Nearest Neighbor) and a Gaussian kernel-based estimator.
# (k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram
# estimator.

# list of estimators to compare
metrics = {
"GCMI": get_entropy("gc", biascorrect=False),
"KNN-3": get_entropy("knn", k=3),
"KNN-10": get_entropy("knn", k=10),
"Kernel": get_entropy("kernel"),
"Histogram": get_entropy("histogram"),
}

# number of samples to simulate data
n_samples = np.geomspace(100, 10000, 10).astype(int)
n_samples = np.geomspace(20, 1000, 15).astype(int)

# number of repetitions to estimate the percentile interval
n_repeat = 10
Expand Down Expand Up @@ -68,6 +71,7 @@ def plot(ent, ent_theoric, ax):
ax.set_ylabel("Entropy [bits]")


# %%
###############################################################################
# Entropy of data sampled from multinormal distribution
# -------------------------------------------
Expand Down
10 changes: 9 additions & 1 deletion examples/it/plot_mi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,19 @@
#
# Let us define several estimators of MI. We are going to use the GC MI
# (Gaussian Copula Mutual Information), the KNN (k Nearest Neighbor) and the
# kernel-based estimator and using the a binning approach.
# kernel-based estimator, using the a binning approach and the histogram
# estimator. Please note that the histogram estimator is equivalent to the
# binning, with a correction that relate to the difference between the Shannon
# entropy of discrete variables and the differential entropy of continuous
# variables. This correction in the case of mutual information (MI) is not
# needed, because in the operation to compute the MI, the difference between
# discrete and differential entropy cancel out.


# create a special function for the binning approach as it requires binary data
mi_binning_fcn = get_mi("binning", base=2)


def mi_binning(x, y, **kwargs):
x = digitize(x.T, **kwargs).T
y = digitize(y.T, **kwargs).T
Expand All @@ -51,6 +58,7 @@ def mi_binning(x, y, **kwargs):
"KNN-10": get_mi("knn", k=10),
"Kernel": get_mi("kernel"),
"Binning": partial(mi_binning, n_bins=4),
"Histogram": get_mi("histogram", n_bins=4),
}

# number of samples to simulate data
Expand Down
11 changes: 8 additions & 3 deletions examples/it/plot_mi_high_dimensional.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
Czyz et al., NeurIPS 2023 :cite:`czyz2024beyond`.
"""

# %%

import numpy as np

from hoi.core import get_mi
Expand All @@ -28,18 +30,19 @@
# Definition of MI estimators
# ---------------------------
#
# We are going to use the GCMI (Gaussian Copula Mutual Information) and KNN
# (k Nearest Neighbor)
# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
# (k Nearest Neighbor) and histogram estimator.

# list of estimators to compare
metrics = {
"GCMI": get_mi("gc", biascorrect=False),
"KNN-3": get_mi("knn", k=3),
"KNN-10": get_mi("knn", k=10),
"Histogram": get_mi("histogram", n_bins=3),
}

# number of samples to simulate data
n_samples = np.geomspace(1000, 10000, 10).astype(int)
n_samples = np.geomspace(20, 1000, 15).astype(int)

# number of repetitions to estimate the percentile interval
n_repeat = 10
Expand Down Expand Up @@ -69,6 +72,8 @@ def plot(mi, mi_theoric, ax):
ax.set_ylabel("Mutual-information [bits]")


# %%

###############################################################################
# MI of data sampled from splitted multinormal distribution
# -------------------------------------------
Expand Down