Skip to content

Commit

Permalink
Deploying to gh-pages from @ 8befa74 🚀
Browse files Browse the repository at this point in the history
  • Loading branch information
EtienneCmb committed Sep 13, 2024
1 parent dacf1f5 commit e6039df
Show file tree
Hide file tree
Showing 79 changed files with 335 additions and 312 deletions.
Binary file modified _downloads/071683bb9f5d321841ee9dd62eee27a4/plot_sim_red_syn.zip
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified _downloads/1e58f0eaeee8456990c2959bfc1babe9/plot_oinfo.zip
Binary file not shown.
Binary file modified _downloads/31d2b25c7ef60a5789246c25d83787c6/plot_ml_vs_it.zip
Binary file not shown.
Binary file not shown.
5 changes: 3 additions & 2 deletions _downloads/4481fca80fcd36eb6fde9d9cb4e3134c/plot_entropies.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,13 +29,14 @@
# -----------------------------------
#
# Let us define several estimators of entropy. We are going to use the GC
# (Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based
# estimator.
# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
# estimator and the histogram estimator.

# list of estimators to compare
metrics = {
"GC": get_entropy("gc"),
"Gaussian": get_entropy(method="gauss"),
"Histogram": get_entropy(method="histogram"),
"KNN-3": get_entropy("knn", k=3),
"Kernel": get_entropy("kernel"),
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based\nand the Gaussian estimators.\n\n"
"## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the\nGaussian estimators and the histogram estimator.\n\n"
]
},
{
Expand All @@ -33,7 +33,7 @@
},
"outputs": [],
"source": [
"# list of estimators to compare\nmetrics = {\n \"GC\": get_entropy(\"gc\"),\n \"Gaussian\": get_entropy(\"gauss\"),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 5"
"# list of estimators to compare\nmetrics = {\n \"GC\": get_entropy(\"gc\"),\n \"Gaussian\": get_entropy(\"gauss\"),\n \"Histogram\": get_entropy(\"histogram\"),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 5"
]
},
{
Expand Down
Binary file modified _downloads/4edd620b8310fd78193d26c454d1ca13/plot_rsi.zip
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Definition of entropy estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor) and a Gaussian kernel-based estimator.\n\n"
"## Definition of entropy estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram\nestimator.\n\n"
]
},
{
Expand All @@ -33,14 +33,14 @@
},
"outputs": [],
"source": [
"# list of estimators to compare\nmetrics = {\n \"GCMI\": get_entropy(\"gc\", biascorrect=False),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"KNN-10\": get_entropy(\"knn\", k=10),\n \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(100, 10000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(ent, ent_theoric, ax):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(ent.keys()):\n # get the entropies\n x = ent[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the MI as a function of the number of samples and interval\n ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n ax.axhline(\n ent_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n )\n ax.legend()\n ax.set_xlabel(\"Number of samples\")\n ax.set_ylabel(\"Entropy [bits]\")"
"# list of estimators to compare\nmetrics = {\n \"GCMI\": get_entropy(\"gc\", biascorrect=False),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"KNN-10\": get_entropy(\"knn\", k=10),\n \"Kernel\": get_entropy(\"kernel\"),\n \"Histogram\": get_entropy(\"histogram\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(ent, ent_theoric, ax):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(ent.keys()):\n # get the entropies\n x = ent[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the MI as a function of the number of samples and interval\n ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n ax.axhline(\n ent_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n )\n ax.legend()\n ax.set_xlabel(\"Number of samples\")\n ax.set_ylabel(\"Entropy [bits]\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Entropy of data sampled from multinormal distribution\n\nLet variables $X_1,X_2,...,X_n$ have a multivariate normal distribution\n$\\mathcal{N}(\\vec{\\mu}, \\Sigma)$ the theoretical entropy in bits is\ndefined by :\n\n\\begin{align}H(X) = \\frac{1}{2} \\times log_{2}({|\\Sigma|}(2\\pi e)^{n})\\end{align}\n\n\n"
"##############################################################################\n Entropy of data sampled from multinormal distribution\n -------------------------------------------\n\n Let variables $X_1,X_2,...,X_n$ have a multivariate normal distribution\n $\\mathcal{N}(\\vec{\\mu}, \\Sigma)$ the theoretical entropy in bits is\n defined by :\n\n .. math::\n H(X) = \\frac{1}{2} \\times log_{2}({|\\Sigma|}(2\\pi e)^{n})\n\n\n"
]
},
{
Expand Down
Binary file modified _downloads/58edfc8265b6af8f03b242ea4656b10f/plot_syn_phiID.zip
Binary file not shown.
Binary file modified _downloads/675ecb045af78b81dbd01cbea1d59fa1/plot_entropies_mvar.zip
Binary file not shown.
Binary file modified _downloads/69a8436c098192f96580d8e9cd2f5d4d/plot_entropies.zip
Binary file not shown.
Binary file modified _downloads/6df867479300c4bb75728e18f2fcc139/plot_infotopo.zip
Binary file not shown.
Binary file not shown.
10 changes: 9 additions & 1 deletion _downloads/7aa0a86e987814b09694d9eac79452ec/plot_mi.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,19 @@
#
# Let us define several estimators of MI. We are going to use the GC MI
# (Gaussian Copula Mutual Information), the KNN (k Nearest Neighbor) and the
# kernel-based estimator and using the a binning approach.
# kernel-based estimator, using the a binning approach and the histogram
# estimator. Please note that the histogram estimator is equivalent to the
# binning, with a correction that relate to the difference between the Shannon
# entropy of discrete variables and the differential entropy of continuous
# variables. This correction in the case of mutual information (MI) is not
# needed, because in the operation to compute the MI, the difference between
# discrete and differential entropy cancel out.


# create a special function for the binning approach as it requires binary data
mi_binning_fcn = get_mi("binning", base=2)


def mi_binning(x, y, **kwargs):
x = digitize(x.T, **kwargs).T
y = digitize(y.T, **kwargs).T
Expand All @@ -51,6 +58,7 @@ def mi_binning(x, y, **kwargs):
"KNN-10": get_mi("knn", k=10),
"Kernel": get_mi("kernel"),
"Binning": partial(mi_binning, n_bins=4),
"Histogram": get_mi("histogram", n_bins=4),
}

# number of samples to simulate data
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,14 @@
# -----------------------------------
#
# Let us define several estimators of entropy. We are going to use the GC
# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based
# and the Gaussian estimators.
# (Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based, the
# Gaussian estimators and the histogram estimator.

# list of estimators to compare
metrics = {
"GC": get_entropy("gc"),
"Gaussian": get_entropy("gauss"),
"Histogram": get_entropy("histogram"),
"KNN-3": get_entropy("knn", k=3),
"Kernel": get_entropy("kernel"),
}
Expand Down
Binary file modified _downloads/9b8182d6a0423d2c5a54190326bb52f0/plot_jax.zip
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor) and the kernel-based\nestimator.\n\n"
"## Definition of estimators of entropy\n\nLet us define several estimators of entropy. We are going to use the GC\n(Gaussian Copula), the KNN (k Nearest Neighbor), the kernel-based\nestimator and the histogram estimator.\n\n"
]
},
{
Expand All @@ -33,7 +33,7 @@
},
"outputs": [],
"source": [
"# list of estimators to compare\nmetrics = {\n \"GC\": get_entropy(\"gc\"),\n \"Gaussian\": get_entropy(method=\"gauss\"),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(h_x, h_theoric):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(h_x.keys()):\n # get the entropies\n x = h_x[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the entropy as a function of the number of samples and interval\n plt.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n plt.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n plt.axhline(\n h_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n )\n plt.legend()\n plt.xlabel(\"Number of samples\")\n plt.ylabel(\"Entropy [bits]\")"
"# list of estimators to compare\nmetrics = {\n \"GC\": get_entropy(\"gc\"),\n \"Gaussian\": get_entropy(method=\"gauss\"),\n \"Histogram\": get_entropy(method=\"histogram\"),\n \"KNN-3\": get_entropy(\"knn\", k=3),\n \"Kernel\": get_entropy(\"kernel\"),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(h_x, h_theoric):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(h_x.keys()):\n # get the entropies\n x = h_x[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the entropy as a function of the number of samples and interval\n plt.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n plt.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n plt.axhline(\n h_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical entropy\"\n )\n plt.legend()\n plt.xlabel(\"Number of samples\")\n plt.ylabel(\"Entropy [bits]\")"
]
},
{
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
"""

# %%
import numpy as np

from hoi.core import get_entropy
Expand All @@ -25,18 +26,20 @@
# ---------------------------
#
# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
# (k Nearest Neighbor) and a Gaussian kernel-based estimator.
# (k Nearest Neighbor), a Gaussian kernel-based estimator and the histogram
# estimator.

# list of estimators to compare
metrics = {
"GCMI": get_entropy("gc", biascorrect=False),
"KNN-3": get_entropy("knn", k=3),
"KNN-10": get_entropy("knn", k=10),
"Kernel": get_entropy("kernel"),
"Histogram": get_entropy("histogram"),
}

# number of samples to simulate data
n_samples = np.geomspace(100, 10000, 10).astype(int)
n_samples = np.geomspace(20, 1000, 15).astype(int)

# number of repetitions to estimate the percentile interval
n_repeat = 10
Expand Down Expand Up @@ -68,6 +71,7 @@ def plot(ent, ent_theoric, ax):
ax.set_ylabel("Entropy [bits]")


# %%
###############################################################################
# Entropy of data sampled from multinormal distribution
# -------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
Czyz et al., NeurIPS 2023 :cite:`czyz2024beyond`.
"""

# %%

import numpy as np

from hoi.core import get_mi
Expand All @@ -28,18 +30,19 @@
# Definition of MI estimators
# ---------------------------
#
# We are going to use the GCMI (Gaussian Copula Mutual Information) and KNN
# (k Nearest Neighbor)
# We are going to use the GCMI (Gaussian Copula Mutual Information), KNN
# (k Nearest Neighbor) and histogram estimator.

# list of estimators to compare
metrics = {
"GCMI": get_mi("gc", biascorrect=False),
"KNN-3": get_mi("knn", k=3),
"KNN-10": get_mi("knn", k=10),
"Histogram": get_mi("histogram", n_bins=3),
}

# number of samples to simulate data
n_samples = np.geomspace(1000, 10000, 10).astype(int)
n_samples = np.geomspace(20, 1000, 15).astype(int)

# number of repetitions to estimate the percentile interval
n_repeat = 10
Expand Down Expand Up @@ -69,6 +72,8 @@ def plot(mi, mi_theoric, ax):
ax.set_ylabel("Mutual-information [bits]")


# %%

###############################################################################
# MI of data sampled from splitted multinormal distribution
# -------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"## Definition of MI estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information) and KNN\n(k Nearest Neighbor)\n\n"
"## Definition of MI estimators\n\nWe are going to use the GCMI (Gaussian Copula Mutual Information), KNN\n(k Nearest Neighbor) and histogram estimator.\n\n"
]
},
{
Expand All @@ -33,7 +33,7 @@
},
"outputs": [],
"source": [
"# list of estimators to compare\nmetrics = {\n \"GCMI\": get_mi(\"gc\", biascorrect=False),\n \"KNN-3\": get_mi(\"knn\", k=3),\n \"KNN-10\": get_mi(\"knn\", k=10),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(1000, 10000, 10).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(mi, mi_theoric, ax):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(mi.keys()):\n # get the entropies\n x = mi[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the MI as a function of the number of samples and interval\n ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n ax.axhline(mi_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical MI\")\n ax.legend()\n ax.set_xlabel(\"Number of samples\")\n ax.set_ylabel(\"Mutual-information [bits]\")"
"# list of estimators to compare\nmetrics = {\n \"GCMI\": get_mi(\"gc\", biascorrect=False),\n \"KNN-3\": get_mi(\"knn\", k=3),\n \"KNN-10\": get_mi(\"knn\", k=10),\n \"Histogram\": get_mi(\"histogram\", n_bins=3),\n}\n\n# number of samples to simulate data\nn_samples = np.geomspace(20, 1000, 15).astype(int)\n\n# number of repetitions to estimate the percentile interval\nn_repeat = 10\n\n\n# plotting function\ndef plot(mi, mi_theoric, ax):\n \"\"\"Plotting function.\"\"\"\n for n_m, metric_name in enumerate(mi.keys()):\n # get the entropies\n x = mi[metric_name]\n\n # get the color\n color = f\"C{n_m}\"\n\n # estimate lower and upper bounds of the [5, 95]th percentile interval\n x_low, x_high = np.percentile(x, [5, 95], axis=0)\n\n # plot the MI as a function of the number of samples and interval\n ax.plot(n_samples, x.mean(0), color=color, lw=2, label=metric_name)\n ax.fill_between(n_samples, x_low, x_high, color=color, alpha=0.2)\n\n # plot the theoretical value\n ax.axhline(mi_theoric, linestyle=\"--\", color=\"k\", label=\"Theoretical MI\")\n ax.legend()\n ax.set_xlabel(\"Number of samples\")\n ax.set_ylabel(\"Mutual-information [bits]\")"
]
},
{
Expand Down
Binary file modified _downloads/cb54325e1a49091788b29a83388e5628/plot_mi.zip
Binary file not shown.
Loading

0 comments on commit e6039df

Please sign in to comment.